ó
ù`]c           @  s  d  d l  m Z m Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l m	 Z	 d  d l
 m Z d  d l m Z m Z m Z d  d l m Z d  d l m Z d  d l m Z d	 „  Z d
 „  Z d „  Z d e f d „  ƒ  YZ e d k rd  d l Z e j ƒ  n  d S(   iÿÿÿÿ(   t   print_functiont   unicode_literalsN(   t	   text_type(   t   ZipFilePathPointer(   t   find_dirt	   find_filet   find_jars_within_path(   t   ParserI(   t   DependencyGraph(   t   taggedsents_to_conllc          C  sw   d d l  m }  |  d1 d2 d3 d4 d5 d6 d7 d8 d9 d: d; d< d= d> d? d@ dA dB dC dD dE dF dG dH dI dJ dK dL g ƒ } | j S(M   Niÿÿÿÿ(   t   RegexpTaggeru   \.$u   .u   \,$u   ,u   \?$u   ?u   \($u   (u   \)$u   )u   \[$u   [u   \]$u   ]u   ^-?[0-9]+(.[0-9]+)?$u   CDu   (The|the|A|a|An|an)$u   DTu&   (He|he|She|she|It|it|I|me|Me|You|you)$u   PRPu   (His|his|Her|her|Its|its)$u   PRP$u   (my|Your|your|Yours|yours)$u    (on|On|in|In|at|At|since|Since)$u   INu    (for|For|ago|Ago|before|Before)$u   (till|Till|until|Until)$u   (by|By|beside|Beside)$u   (under|Under|below|Below)$u   (over|Over|above|Above)$u    (across|Across|through|Through)$u   (into|Into|towards|Towards)$u   (onto|Onto|from|From)$u   .*able$u   JJu   .*ness$u   NNu   .*ly$u   RBu   .*s$u   NNSu   .*ing$u   VBGu   .*ed$u   VBDu   .*(   u   \.$u   .(   u   \,$u   ,(   u   \?$u   ?(   u   \($u   ((   u   \)$u   )(   u   \[$u   [(   u   \]$u   ](   u   ^-?[0-9]+(.[0-9]+)?$u   CD(   u   (The|the|A|a|An|an)$u   DT(   u&   (He|he|She|she|It|it|I|me|Me|You|you)$u   PRP(   u   (His|his|Her|her|Its|its)$u   PRP$(   u   (my|Your|your|Yours|yours)$u   PRP$(   u    (on|On|in|In|at|At|since|Since)$u   IN(   u    (for|For|ago|Ago|before|Before)$u   IN(   u   (till|Till|until|Until)$u   IN(   u   (by|By|beside|Beside)$u   IN(   u   (under|Under|below|Below)$u   IN(   u   (over|Over|above|Above)$u   IN(   u    (across|Across|through|Through)$u   IN(   u   (into|Into|towards|Towards)$u   IN(   u   (onto|Onto|from|From)$u   IN(   u   .*able$u   JJ(   u   .*ness$u   NN(   u   .*ly$u   RB(   u   .*s$u   NNS(   u   .*ing$u   VBG(   u   .*ed$u   VBD(   u   .*u   NN(   t   nltk.tagR
   t   tag(   R
   t   _tagger(    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pyt   malt_regex_tagger   s>    c         C  s¶   t  j j |  ƒ r |  } n t |  d d	 ƒ} d d d g } t t | ƒ ƒ } t d „  | Dƒ ƒ } t d d d g ƒ } | j | ƒ sŽ t ‚ t t	 d „  | ƒ ƒ s¬ t ‚ t
 | ƒ S(
   uE   
    A module to find MaltParser .jar file and its dependencies.
    t   env_varsu   MALT_PARSERu    c         s  s%   |  ] } t  j j | ƒ d  Vq d S(   i   N(   t   ost   patht   split(   t   .0t   jar(    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pys	   <genexpr>N   s    u	   log4j.jaru
   libsvm.jaru   liblinear-1.8.jarc         S  s   |  j  d ƒ o |  j d ƒ S(   Nu   maltparser-u   .jar(   t
   startswitht   endswith(   t   i(    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pyt   <lambda>S   t    (   u   MALT_PARSER(   R   R   t   existsR   t   setR   t   issubsett   AssertionErrort   anyt   filtert   list(   t   parser_dirnamet	   _malt_dirt   malt_dependenciest
   _malt_jarst   _jars(    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pyt   find_maltparserC   s    	c         C  s@   |  d k r d St j j |  ƒ r& |  St |  d d d t ƒSd S(   u8   
    A module to find pre-trained MaltParser model.
    u   malt_temp.mcoR   u
   MALT_MODELt   verboseN(   u
   MALT_MODEL(   t   NoneR   R   R   R   t   False(   t   model_filename(    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pyt   find_malt_modelX   s
    t
   MaltParserc           B  sw   e  Z d  Z d	 d	 d	 d „ Z e d d „ Z e d d „ Z d	 d	 d „ Z e	 e d „ ƒ Z
 e d „ Z e d „ Z RS(
   uØ  
    A class for dependency parsing with MaltParser. The input is the paths to:
    - a maltparser directory
    - (optionally) the path to a pre-trained MaltParser .mco model file
    - (optionally) the tagger to use for POS tagging before parsing
    - (optionally) additional Java arguments

    Example:
        >>> from nltk.parse import malt
        >>> # With MALT_PARSER and MALT_MODEL environment set.
        >>> mp = malt.MaltParser('maltparser-1.7.2', 'engmalt.linear-1.7.mco') # doctest: +SKIP
        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
        (shot I (elephant an) (in (pajamas my)) .)
        >>> # Without MALT_PARSER and MALT_MODEL environment.
        >>> mp = malt.MaltParser('/home/user/maltparser-1.7.2/', '/home/user/engmalt.linear-1.7.mco') # doctest: +SKIP
        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
        (shot I (elephant an) (in (pajamas my)) .)
    c         C  s|   t  | ƒ |  _ | d k	 r! | n g  |  _ t | ƒ |  _ |  j d k |  _ t j ƒ  |  _	 | d k	 rl | n t
 ƒ  |  _ d S(   u¿  
        An interface for parsing with the Malt Parser.

        :param parser_dirname: The path to the maltparser directory that
        contains the maltparser-1.x.jar
        :type parser_dirname: str
        :param model_filename: The name of the pre-trained model with .mco file
        extension. If provided, training will not be required.
        (see http://www.maltparser.org/mco/mco.html and
        see http://www.patful.com/chalk/node/185)
        :type model_filename: str
        :param tagger: The tagger used to POS tag the raw string before
        formatting to CONLL format. It should behave like `nltk.pos_tag`
        :type tagger: function
        :param additional_java_args: This is the additional Java arguments that
        one can use when calling Maltparser, usually this is the heapsize
        limits, e.g. `additional_java_args=['-Xmx1024m']`
        (see http://goo.gl/mpDBvQ)
        :type additional_java_args: list
        u   malt_temp.mcoN(   R&   t	   malt_jarsR(   t   additional_java_argsR+   t   modelt   _trainedt   tempfilet
   gettempdirt   working_dirR   t   tagger(   t   selfR!   R*   R4   R.   (    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pyt   __init__x   s    u   nullc         c  sÇ  |  j  s t d ƒ ‚ n  t j d d d |  j d d d t ƒ b} t j d d d |  j d d d t ƒ 2} x' t | ƒ D] } | j t | ƒ ƒ qy W| j	 ƒ  |  j
 | j | j d d	 ƒ} t j ƒ  } y$ t j t j j |  j ƒ d
 ƒ Wn n X|  j | | ƒ }	 t j | ƒ |	 d
 k	 rEt d d j | ƒ |	 f ƒ ‚ n  t | j ƒ A }
 x7 |
 j ƒ  j d ƒ D]  } t t | d | ƒg ƒ VqmWWd QXWd QXWd QXt j | j ƒ t j | j ƒ d S(   u·  
        Use MaltParser to parse multiple POS tagged sentences. Takes multiple
        sentences where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentence: list(list(tuple(str, str)))
        :return: iter(iter(``DependencyGraph``)) the dependency graph
        representation of each sentence
        u0   Parser has not been trained. Call train() first.t   prefixu   malt_input.conll.t   dirt   modeu   wt   deleteu   malt_output.conll.u   parsei    u0   MaltParser parsing (%s) failed with exit code %du    u   

t   top_relation_labelN(   R0   t	   ExceptionR1   t   NamedTemporaryFileR3   R)   R	   t   writeR   t   closet   generate_malt_commandt   nameR   t   getcwdt   chdirR   R   R/   t   _executet   joint   opent   readt   iterR   t   remove(   R5   t	   sentencesR'   R;   t
   input_filet   output_filet   linet   cmdt   _current_patht   rett   infilet   tree_str(    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pyt   parse_tagged_sents¢   sD    					
$*c           s,   ‡  f d †  | Dƒ } ˆ  j  | | d | ƒS(   un  
        Use MaltParser to parse multiple sentences.
        Takes a list of sentences, where each sentence is a list of words.
        Each sentence will be automatically tagged with this
        MaltParser instance's tagger.

        :param sentences: Input sentences to parse
        :type sentence: list(list(str))
        :return: iter(DependencyGraph)
        c         3  s   |  ] } ˆ  j  | ƒ Vq d  S(   N(   R4   (   R   t   sentence(   R5   (    s.   lib/python2.7/site-packages/nltk/parse/malt.pys	   <genexpr>ï   s    R;   (   RS   (   R5   RJ   R'   R;   t   tagged_sentences(    (   R5   s.   lib/python2.7/site-packages/nltk/parse/malt.pyt   parse_sentsä   s    c         C  sî   d g } | |  j  7} t j j d ƒ r. d n d } | d | j |  j ƒ g 7} | d g 7} t j j |  j	 ƒ r˜ | d t j j
 |  j	 ƒ d g 7} n | d |  j	 g 7} | d	 | g 7} | d
 k rÚ | d | g 7} n  | d | g 7} | S(   u  
        This function generates the maltparser command use at the terminal.

        :param inputfilename: path to the input file
        :type inputfilename: str
        :param outputfilename: path to the output file
        :type outputfilename: str
        u   javau   winu   ;u   :u   -cpu   org.maltparser.Maltu   -ciÿÿÿÿu   -iu   parseu   -ou   -m(   R.   t   syst   platformR   RE   R-   R   R   R   R/   R   (   R5   t   inputfilenamet   outputfilenameR9   RN   t   classpaths_separator(    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pyR@   ô   s    
	&c         C  s:   | r d  n t j } t j |  d | d | ƒ} | j ƒ  S(   Nt   stdoutt   stderr(   R(   t
   subprocesst   PIPEt   Popent   wait(   RN   R'   t   outputt   p(    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pyRD     s    c      	   C  s†   t  j d d d |  j d d d t ƒ 3 } d j d „  | Dƒ ƒ } | j t | ƒ ƒ Wd	 QX|  j | j d
 | ƒt	 j
 | j ƒ d	 S(   uÍ   
        Train MaltParser from a list of ``DependencyGraph`` objects

        :param depgraphs: list of ``DependencyGraph`` objects for training input data
        :type depgraphs: DependencyGraph
        R7   u   malt_train.conll.R8   R9   u   wR:   u   
c         s  s   |  ] } | j  d  ƒ Vq d S(   i
   N(   t   to_conll(   R   t   dg(    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pys	   <genexpr>&  s    NR'   (   R1   R=   R3   R)   RE   R>   R   t   train_from_fileRA   R   RI   (   R5   t	   depgraphsR'   RK   t	   input_str(    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pyt   train  s    		c         C  sî   t  | t ƒ rŒ t j d d d |  j d d d t ƒ Q } | j ƒ  & } | j ƒ  } | j t	 | ƒ ƒ Wd QX|  j
 | j d | ƒSWd QXn  |  j | d d	 ƒ} |  j | | ƒ } | d
 k rá t d d j | ƒ | f ƒ ‚ n  t |  _ d S(   u—   
        Train MaltParser from a file
        :param conll_file: str for the filename of the training input data
        :type conll_file: str
        R7   u   malt_train.conll.R8   R9   u   wR:   NR'   u   learni    u1   MaltParser training (%s) failed with exit code %du    (   t
   isinstanceR   R1   R=   R3   R)   RF   RG   R>   R   Rf   RA   R@   RD   R<   RE   t   TrueR0   (   R5   t
   conll_fileR'   RK   t   conll_input_filet	   conll_strRN   RP   (    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pyRf   -  s    		N(   t   __name__t
   __module__t   __doc__R(   R6   R)   RS   RV   R@   t   staticmethodRD   Ri   Rf   (    (    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pyR,   d   s   %B u   __main__(   t
   __future__R    R   R   RW   R1   R^   t   inspectt   sixR   t	   nltk.dataR   t   nltk.internalsR   R   R   t   nltk.parse.apiR   t   nltk.parse.dependencygraphR   t   nltk.parse.utilR	   R   R&   R+   R,   Ro   t   doctestt   testmod(    (    (    s.   lib/python2.7/site-packages/nltk/parse/malt.pyt   <module>   s$   	&		æC