ó
ů`]c           @  s  d  Z  d d l m Z d d l Z d d l m Z d d l m Z m Z m	 Z	 m
 Z
 d d l m Z d d l m Z d d l m Z e j d	  Z e j d
  Z e j d  Z e j d e j  Z e j d e j  Z e j d e j  Z e j d  Z d e f d     YZ e d e f d     Y Z d   Z d   Z d   Z d   Z  d   Z! d d  Z# e$ d  Z% e d  d    Z& e% d  Z' d S(   u   
CCG Lexicons
i˙˙˙˙(   t   unicode_literalsN(   t   defaultdict(   t   PrimitiveCategoryt	   Directiont   CCGVart   FunctionalCategory(   t   python_2_unicode_compatible(   t
   deprecated(   t
   Expressionu   ([A-Za-z]+)(\[[A-Za-z,]+\])?u"   ([A-Za-z]+(?:\[[A-Za-z,]+\])?)(.*)u   ([\\/])([.,]?)([.,]?)(.*)u   ([\S_]+)\s*(::|[-=]+>)\s*(.+)u   ([^{}]*[^ {}])\s*(\{[^}]+\})?u   \{([^}]+)\}u   ([^#]*)(?:#.*)?t   Tokenc           B  s>   e  Z d  Z d d  Z d   Z d   Z d   Z d   Z RS(   uÄ   
    Class representing a token.

    token => category {semantics}
    e.g. eat => S\var[pl]/var {\x y.eat(x,y)}

    * `token` (string)
    * `categ` (string)
    * `semantics` (Expression)
    c         C  s   | |  _  | |  _ | |  _ d  S(   N(   t   _tokent   _categt
   _semantics(   t   selft   tokent   categt	   semantics(    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyt   __init__=   s    		c         C  s   |  j  S(   N(   R   (   R   (    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyR   B   s    c         C  s   |  j  S(   N(   R   (   R   (    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyR   E   s    c         C  sD   d } |  j  d  k	 r/ d t |  j   d } n  d t |  j  | S(   Nu    u    {u   }(   R   t   Nonet   strR   (   R   t   semantics_str(    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyt   __str__H   s    c         C  s;   t  | t  s d St |  j |  j f | j   | j    S(   Ni˙˙˙˙(   t
   isinstanceR	   t   cmpR   R   R   R   (   R   t   other(    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyt   __cmp__N   s    N(	   t   __name__t
   __module__t   __doc__R   R   R   R   R   R   (    (    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyR	   1   s   
			t
   CCGLexiconc           B  s2   e  Z d  Z d   Z d   Z d   Z d   Z RS(   uâ   
    Class representing a lexicon for CCG grammars.

    * `primitives`: The list of primitive categories for the lexicon
    * `families`: Families of categories
    * `entries`: A mapping of words to possible categories
    c         C  s.   t  |  |  _ | |  _ | |  _ | |  _ d  S(   N(   R   t   _startt   _primitivest	   _familiest   _entries(   R   t   startt
   primitivest   familiest   entries(    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyR   ^   s    		c         C  s   |  j  | S(   u@   
        Returns all the possible categories for a word
        (   R!   (   R   t   word(    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyt
   categoriesd   s    c         C  s   |  j  S(   u;   
        Return the target category for the parser
        (   R   (   R   (    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyR"   j   s    c         C  s   d } t  } x} t |  j  D]l } | s5 | d } n  | | d } t  } x< |  j | D]- } | sp | d } n t } | d | } qW Wq W| S(   uK   
        String representation of the lexicon. Used for debugging.
        u    u   
u    => u    | u   %s(   t   Truet   sortedR!   t   False(   R   t   stringt   firstt   identt   cat(    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyR   p   s    (   R   R   R   R   R'   R"   R   (    (    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyR   T   s
   			c         C  s˛   |  d } d } xf | d k rx | j  d  rx | j  d  r] t |  \ } } | | } q | | d } | d } q W| j  d  r | d | d f St d |  d   d S(	   ub   
    Separate the contents matching the first set of brackets from the rest of
    the input.
    i   u   (u    u   )i    u   Unmatched bracket in string 'u   'N(   t
   startswitht   matchBracketst   AssertionError(   R+   t   restt   insidet   part(    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyR0      s    
c         C  s,   |  j  d  r t |   St j |   j   S(   ub   
    Separate the string for the next portion of the category from the rest
    of the string
    u   ((   R/   R0   t   NEXTPRIM_REt   matcht   groups(   R+   (    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyt   nextCategory   s    
c         C  s   t  |  d |  d  S(   u'   
    Parse an application operator
    i    i   (   R   (   t   app(    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyt   parseApplication¨   s    c         C  s   |  r |  d d !j  d  Sg  S(   u7   
    Parse the subscripts for a primitive category
    i   i˙˙˙˙u   ,(   t   split(   t   subscr(    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyt   parseSubscriptsŻ   s    c         C  sé   |  d d k rE |  d d k rE | d k r8 t   } n  | | f Sn  |  d } | | k r˘ | | \ } } | d k r | } n | j | | f g  } | | f S| | k rŃ t |  d  } t | |  | f St d | d   d S(   u   
    Parse a primitive category

    If the primitive is the special category 'var', replace it with the
    correct `CCGVar`.
    i    u   vari   u   String 'u-   ' is neither a family nor primitive category.N(   R   R   t
   substituteR=   R   R1   (   t   chunksR#   R$   t   vart   catstrR.   t   cvart   subscrs(    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyt   parsePrimitiveCategory¸   s"    
	
c   
      C  sA  t  |   \ } } | j d  rF t | d d !| | |  \ } } n* t t j |  j   | | |  \ } } xÄ | d k r6t j |  j   } t | d d ! } | d } t  |  \ } } | j d  r÷ t | d d !| | |  \ }	 } n* t t j |  j   | | |  \ }	 } t	 | |	 |  } qs W| | f S(   u{   
    Parse a string representing a category, and returns a tuple with
    (possibly) the CCG variable for the category
    u   (i   i˙˙˙˙u    i    i   (
   R8   R/   t   augParseCategoryRD   t   PRIM_RER6   R7   t   APP_RER:   R   (
   t   lineR#   R$   R@   t
   cat_stringR2   t   resR9   t	   directiont   arg(    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyRE   Ö   s     %'
%'c         C  sŹ  t  j   g  } i  } t t  } xp|  j   D]b} t j |  j   d j   } | d k rf q/ n  | j	 d  rŽ | g  | d j   j
 d  D] } | j   ^ q } q/ t j |  j   \ } } }	 t j |	  j   \ }
 } t |
 | |  \ } } | d k r| | f | | <q/ d } | t k rt| d k rOt | d   qtt j t j |  j   d  } n  | | j t | | |   q/ Wt | d | | |  S(	   u@   
    Convert string representation into a lexicon for CCGs.
    i    u    u   :-i   u   ,u   ::u@    must contain semantics because include_semantics is set to TrueN(   R   t   reset_idR   t   listt
   splitlinest   COMMENTS_RER6   R7   t   stripR/   R;   t   LEX_REt   RHS_RERE   R   R(   R1   R   t
   fromstringt   SEMANTICS_REt   appendR	   R   (   t   lex_strt   include_semanticsR#   R$   R%   RH   t   primR-   t   sept   rhsRA   R   R.   R@   R   (    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyRT   ÷   s4    
6!u   Use fromstring() instead.c         C  s
   t  |   S(   N(   RT   (   RW   (    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyt   parseLexicon(  s    uĄ  
    # Rather minimal lexicon based on the openccg `tinytiny' grammar.
    # Only incorporates a subset of the morphological subcategories, however.
    :- S,NP,N                    # Primitive categories
    Det :: NP/N                  # Determiners
    Pro :: NP
    IntransVsg :: S\NP[sg]    # Tensed intransitive verbs (singular)
    IntransVpl :: S\NP[pl]    # Plural
    TransVsg :: S\NP[sg]/NP   # Tensed transitive verbs (singular)
    TransVpl :: S\NP[pl]/NP   # Plural

    the => NP[sg]/N[sg]
    the => NP[pl]/N[pl]

    I => Pro
    me => Pro
    we => Pro
    us => Pro

    book => N[sg]
    books => N[pl]

    peach => N[sg]
    peaches => N[pl]

    policeman => N[sg]
    policemen => N[pl]

    boy => N[sg]
    boys => N[pl]

    sleep => IntransVsg
    sleep => IntransVpl

    eat => IntransVpl
    eat => TransVpl
    eats => IntransVsg
    eats => TransVsg

    see => TransVpl
    sees => TransVsg
    ((   R   t
   __future__R    t   ret   collectionsR   t   nltk.ccg.apiR   R   R   R   t   nltk.compatR   t   nltk.internalsR   t   nltk.sem.logicR   t   compileRF   R5   RG   t   UNICODERR   RS   RU   RP   t   objectR	   R   R0   R8   R:   R=   RD   R   RE   R*   RT   R\   t   openccg_tinytiny(    (    (    s/   lib/python2.7/site-packages/nltk/ccg/lexicon.pyt   <module>	   s6   "#5		
				!1*