B
    P?ð[ñ%  ã               @   s.  d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZ e d	¡Ze d
¡Ze d¡Ze dej¡Ze dej¡Ze dej¡Ze d¡ZG dd„ deƒZeG dd„ deƒƒZdd„ Zdd„ Zdd„ Zdd„ Z dd„ Z!d'dd„Z"d(d!d"„Z#ed#ƒd$d%„ ƒZ$e#d&ƒZ%dS ))z
CCG Lexicons
é    )Úunicode_literalsN)Údefaultdict)ÚPrimitiveCategoryÚ	DirectionÚCCGVarÚFunctionalCategory)Úpython_2_unicode_compatible)Ú
deprecated)Ú
Expressionz([A-Za-z]+)(\[[A-Za-z,]+\])?z"([A-Za-z]+(?:\[[A-Za-z,]+\])?)(.*)z([\\/])([.,]?)([.,]?)(.*)z([\S_]+)\s*(::|[-=]+>)\s*(.+)z([^{}]*[^ {}])\s*(\{[^}]+\})?z\{([^}]+)\}z([^#]*)(?:#.*)?c               @   s:   e Zd ZdZddd„Zdd„ Zdd„ Zd	d
„ Zdd„ ZdS )ÚTokenzÄ
    Class representing a token.

    token => category {semantics}
    e.g. eat => S\var[pl]/var {\x y.eat(x,y)}

    * `token` (string)
    * `categ` (string)
    * `semantics` (Expression)
    Nc             C   s   || _ || _|| _d S )N)Z_tokenÚ_categÚ
_semantics)ÚselfÚtokenÚcategÚ	semantics© r   ú/lib/python3.7/site-packages/nltk/ccg/lexicon.pyÚ__init__=   s    zToken.__init__c             C   s   | j S )N)r   )r   r   r   r   r   B   s    zToken.categc             C   s   | j S )N)r   )r   r   r   r   r   E   s    zToken.semanticsc             C   s2   d}| j d k	r dt| j ƒ d }dt| jƒ | S )NÚ z {Ú})r   Ústrr   )r   Úsemantics_strr   r   r   Ú__str__H   s    
zToken.__str__c             C   s*   t |tƒsdS t| j| jf| ¡ | ¡ ƒS )Néÿÿÿÿ)Ú
isinstancer   Zcmpr   r   r   r   )r   Úotherr   r   r   Ú__cmp__N   s    
zToken.__cmp__)N)	Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   r   r   r   r   r   1   s   

r   c               @   s0   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
S )Ú
CCGLexiconzâ
    Class representing a lexicon for CCG grammars.

    * `primitives`: The list of primitive categories for the lexicon
    * `families`: Families of categories
    * `entries`: A mapping of words to possible categories
    c             C   s    t |ƒ| _|| _|| _|| _d S )N)r   Ú_startZ_primitivesZ	_familiesÚ_entries)r   ÚstartÚ
primitivesÚfamiliesÚentriesr   r   r   r   ^   s    
zCCGLexicon.__init__c             C   s
   | j | S )z@
        Returns all the possible categories for a word
        )r$   )r   Zwordr   r   r   Ú
categoriesd   s    zCCGLexicon.categoriesc             C   s   | j S )z;
        Return the target category for the parser
        )r#   )r   r   r   r   r%   j   s    zCCGLexicon.startc             C   sn   d}d}x`t | jƒD ]R}|s$|d }|| d }d}x0| j| D ]"}|sR|d }nd}|d|  }q@W qW |S )zK
        String representation of the lexicon. Used for debugging.
        r   TÚ
z => z | Fz%s)Úsortedr$   )r   ÚstringÚfirstÚidentÚcatr   r   r   r   p   s    
zCCGLexicon.__str__N)r   r   r    r!   r   r)   r%   r   r   r   r   r   r"   T   s
   r"   c             C   s’   | dd… }d}xN|dkr^|  d¡s^|  d¡rDt|ƒ\}}|| }q||d  }|dd… }qW |  d¡r~|d |dd… fS td|  d ƒ‚dS )	zb
    Separate the contents matching the first set of brackets from the rest of
    the input.
    é   Nú(r   ú)r   zUnmatched bracket in string 'ú')Ú
startswithÚmatchBracketsÚAssertionError)r,   ÚrestZinsideÚpartr   r   r   r5   Š   s    


r5   c             C   s    |   d¡rt| ƒS t | ¡ ¡ S )zb
    Separate the string for the next portion of the category from the rest
    of the string
    r1   )r4   r5   ÚNEXTPRIM_REÚmatchÚgroups)r,   r   r   r   ÚnextCategoryž   s    
r<   c             C   s   t | d | dd… ƒS )z'
    Parse an application operator
    r   r0   N)r   )Úappr   r   r   ÚparseApplication¨   s    r>   c             C   s   | r| dd…   d¡S g S )z7
    Parse the subscripts for a primitive category
    r0   r   ú,)Úsplit)Zsubscrr   r   r   ÚparseSubscripts¯   s    rA   c             C   s¦   | d dkr.| d dkr.|dkr&t ƒ }||fS | d }||krp|| \}}|dkrX|}n| ||fg¡}||fS ||kr’t| d ƒ}t||ƒ|fS td| d ƒ‚dS )zƒ
    Parse a primitive category

    If the primitive is the special category 'var', replace it with the
    correct `CCGVar`.
    r   Úvarr0   NzString 'z-' is neither a family nor primitive category.)r   Z
substituterA   r   r6   )Zchunksr&   r'   rB   Úcatstrr/   ZcvarZsubscrsr   r   r   ÚparsePrimitiveCategory¸   s"    rD   c       
      C   sä   t | ƒ\}}| d¡r2t|dd… |||ƒ\}}ntt |¡ ¡ |||ƒ\}}xŒ|dkrÚt |¡ ¡ }t|dd… ƒ}|d }t |ƒ\}}| d¡r°t|dd… |||ƒ\}	}ntt |¡ ¡ |||ƒ\}	}t	||	|ƒ}qPW ||fS )z{
    Parse a string representing a category, and returns a tuple with
    (possibly) the CCG variable for the category
    r1   r0   r   r   r   é   )
r<   r4   ÚaugParseCategoryrD   ÚPRIM_REr:   r;   ÚAPP_REr>   r   )
Úliner&   r'   rB   Z
cat_stringr7   Zresr=   Ú	directionÚargr   r   r   rF   Ö   s     


rF   Fc             C   s(  t  ¡  g }i }ttƒ}xü|  ¡ D ]ð}t |¡ ¡ d  ¡ }|dkrFq"| 	d¡rv|dd„ |dd…  ¡  
d¡D ƒ }q"t |¡ ¡ \}}}t |¡ ¡ \}	}
t|	||ƒ\}}|d	krÂ||f||< q"d}|d
krü|
dkrät|d ƒ‚nt t |
¡ ¡ d ¡}||  t|||ƒ¡ q"W t|d |||ƒS )z@
    Convert string representation into a lexicon for CCGs.
    r   r   z:-c             S   s   g | ]}|  ¡ ‘qS r   )Ústrip)Ú.0Zprimr   r   r   ú
<listcomp>
  s    zfromstring.<locals>.<listcomp>é   Nr?   z::Tz@ must contain semantics because include_semantics is set to True)r   Zreset_idr   ÚlistÚ
splitlinesÚCOMMENTS_REr:   r;   rL   r4   r@   ÚLEX_REÚRHS_RErF   r6   r
   Ú
fromstringÚSEMANTICS_REÚappendr   r"   )Úlex_strZinclude_semanticsr&   r'   r(   rI   r.   ÚsepZrhsrC   r   r/   rB   r   r   r   r   rU   ÷   s4    
$
rU   zUse fromstring() instead.c             C   s   t | ƒS )N)rU   )rX   r   r   r   ÚparseLexicon(  s    rZ   a¡  
    # Rather minimal lexicon based on the openccg `tinytiny' grammar.
    # Only incorporates a subset of the morphological subcategories, however.
    :- S,NP,N                    # Primitive categories
    Det :: NP/N                  # Determiners
    Pro :: NP
    IntransVsg :: S\NP[sg]    # Tensed intransitive verbs (singular)
    IntransVpl :: S\NP[pl]    # Plural
    TransVsg :: S\NP[sg]/NP   # Tensed transitive verbs (singular)
    TransVpl :: S\NP[pl]/NP   # Plural

    the => NP[sg]/N[sg]
    the => NP[pl]/N[pl]

    I => Pro
    me => Pro
    we => Pro
    us => Pro

    book => N[sg]
    books => N[pl]

    peach => N[sg]
    peaches => N[pl]

    policeman => N[sg]
    policemen => N[pl]

    boy => N[sg]
    boys => N[pl]

    sleep => IntransVsg
    sleep => IntransVpl

    eat => IntransVpl
    eat => TransVpl
    eats => IntransVsg
    eats => TransVsg

    see => TransVpl
    sees => TransVsg
    )N)F)&r!   Z
__future__r   ÚreÚcollectionsr   Znltk.ccg.apir   r   r   r   Znltk.compatr   Znltk.internalsr	   Znltk.sem.logicr
   ÚcompilerG   r9   rH   ÚUNICODErS   rT   rV   rR   Úobjectr   r"   r5   r<   r>   rA   rD   rF   rU   rZ   Zopenccg_tinytinyr   r   r   r   Ú<module>	   s6   



#5
	
!
1*