ó
ù`]c           @  s3  d  d l  m Z d  d l Z d  d l Z d  d l Z d  d l m Z d  d l m Z d  d l	 m
 Z
 d  d l m Z m Z m Z m Z m Z d  d l m Z d  d l m Z d  d	 l m Z d
 Z d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ e d ƒ d „  ƒ Z d S(   iÿÿÿÿ(   t   unicode_literalsN(   t   skip(   t   PIPE(   t	   text_type(   t   find_jar_itert   config_javat   javat   _java_optionst   find_jars_within_path(   t   ParserI(   t   DependencyGraph(   t   Treeu2   https://nlp.stanford.edu/software/lex-parser.shtmlt   GenericStanfordParserc           B  s›   e  Z d  Z d Z d Z d Z e Z e Z d d d d e d d d „ Z
 d	 „  Z e d
 „ Z e d „ Z e d „ Z e d „ Z e d „ Z e d „ Z RS(   u    Interface to the Stanford Parseru+   stanford-parser-(\d+)(\.(\d+))+-models\.jaru   stanford-parser\.jaru3   edu.stanford.nlp.parser.lexparser.LexicalizedParseru4   edu/stanford/nlp/models/lexparser/englishPCFG.ser.gzu   utf8u   -mx4gu    c         C  sØ   t  t |  j | d d d d d t d | d t ƒd d	 „  ƒ} t  t |  j | d d d d d t d | d t ƒd d „  ƒ}	 t j j | ƒ d }
 t	 |	 g t
 |
 ƒ ƒ |  _ | |  _ | |  _ | |  _ | |  _ d  S(   Nt   env_varsu   STANFORD_PARSERu   STANFORD_CORENLPt
   searchpatht   urlt   verboset   is_regext   keyc         S  s   t  j j |  ƒ S(   N(   t   ost   patht   dirname(   t
   model_path(    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyt   <lambda>C   t    u   STANFORD_MODELSc         S  s   t  j j |  ƒ S(   N(   R   R   R   (   R   (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyR   P   R   i    (   u   STANFORD_PARSERu   STANFORD_CORENLP(    (   u   STANFORD_MODELSu   STANFORD_CORENLP(    (   t   maxR   t   _JARt   _stanford_urlt   Truet   _MODEL_JAR_PATTERNR   R   t   splitt   tupleR   t
   _classpathR   t	   _encodingt   corenlp_optionst   java_options(   t   selft   path_to_jart   path_to_models_jarR   t   encodingR   R#   R"   t   stanford_jart	   model_jart   stanford_dir(    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyt   __init__-   s4    					c         C  së   g  } g  } g  } t  } xÆ | j t  ƒ D]µ } | d k rÊ | rb | j t | ƒ ƒ g  } t  } qÝ |  j r™ | j |  j d j | ƒ ƒ ƒ g  } t } qÝ | j t |  j d j | ƒ ƒ g ƒ ƒ g  } q( | j | ƒ t  } q( Wt | ƒ S(   Nu    u   
(   t   Falset
   splitlinest   appendt   itert   _DOUBLE_SPACED_OUTPUTt
   _make_treet   joinR   (   R$   t   output_t   rest	   cur_linest	   cur_treest   blankt   line(    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyt   _parse_trees_output^   s&    			(	
c      
   C  s\   |  j  d |  j d d d |  j d d d g
 } |  j |  j | d j d	 „  | Dƒ ƒ | ƒ ƒ S(
   uâ  
        Use StanfordParser to parse multiple sentences. Takes multiple sentences as a
        list where each sentence is a list of words.
        Each sentence will be automatically tagged with this StanfordParser instance's
        tagger.
        If whitespaces exists inside a token, then the token will be treated as
        separate tokens.

        :param sentences: Input sentences to parse
        :type sentences: list(list(str))
        :rtype: iter(iter(Tree))
        u   -modelu
   -sentencesu   newlineu   -outputFormatu
   -tokenizedu   -escaperu-   edu.stanford.nlp.process.PTBEscapingProcessoru   
c         s  s   |  ] } d  j  | ƒ Vq d S(   u    N(   R2   (   t   .0t   sentence(    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pys	   <genexpr>   s    (   t   _MAIN_CLASSR   t   _OUTPUT_FORMATR9   t   _executeR2   (   R$   t	   sentencesR   t   cmd(    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyt   parse_sentsu   s    	c         C  s   t  |  j | g | ƒ ƒ S(   u&  
        Use StanfordParser to parse a sentence. Takes a sentence as a string;
        before parsing, it will be automatically tokenized and tagged by
        the Stanford Parser.

        :param sentence: Input sentence to parse
        :type sentence: str
        :rtype: iter(Tree)
        (   t   nextt   raw_parse_sents(   R$   R;   R   (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyt	   raw_parse”   s    
c         C  sI   |  j  d |  j d d d |  j g } |  j |  j | d j | ƒ | ƒ ƒ S(   uI  
        Use StanfordParser to parse multiple sentences. Takes multiple sentences as a
        list of strings.
        Each sentence will be automatically tokenized and tagged by the Stanford Parser.

        :param sentences: Input sentences to parse
        :type sentences: list(str)
        :rtype: iter(iter(Tree))
        u   -modelu
   -sentencesu   newlineu   -outputFormatu   
(   R<   R   R=   R9   R>   R2   (   R$   R?   R   R@   (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyRC       s    c         C  s   t  |  j | g | ƒ ƒ S(   u0  
        Use StanfordParser to parse a sentence. Takes a sentence as a list of
        (word, tag) tuples; the sentence must have already been tokenized and
        tagged.

        :param sentence: Input sentence to parse
        :type sentence: list(tuple(str, str))
        :rtype: iter(Tree)
        (   RB   t   tagged_parse_sents(   R$   R;   R   (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyt   tagged_parse·   s    
c           st   d ‰  |  j  d |  j d d d |  j d d ˆ  d d	 d
 d g } |  j |  j | d j ‡  f d †  | Dƒ ƒ | ƒ ƒ S(   ud  
        Use StanfordParser to parse multiple sentences. Takes multiple sentences
        where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentences: list(list(tuple(str, str)))
        :rtype: iter(iter(Tree))
        u   /u   -modelu
   -sentencesu   newlineu   -outputFormatu
   -tokenizedu   -tagSeparatoru   -tokenizerFactoryu,   edu.stanford.nlp.process.WhitespaceTokenizeru   -tokenizerMethodu   newCoreLabelTokenizerFactoryu   
c         3  s.   |  ]$ } d  j  ‡  f d †  | Dƒ ƒ Vq d S(   u    c         3  s   |  ] } ˆ  j  | ƒ Vq d  S(   N(   R2   (   R:   t   tagged(   t   tag_separator(    s2   lib/python2.7/site-packages/nltk/parse/stanford.pys	   <genexpr>ã   s    N(   R2   (   R:   R;   (   RH   (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pys	   <genexpr>ã   s   (   R<   R   R=   R9   R>   R2   (   R$   R?   R   R@   (    (   RH   s2   lib/python2.7/site-packages/nltk/parse/stanford.pyRE   Ã   s,    
	
c   	      C  s“  |  j  } | j d | g ƒ |  j r8 | j |  j ƒ n  d j t ƒ } t d |  j d | ƒ t j	 d d d t
 ƒ õ } t | t ƒ rŸ | rŸ | j | ƒ } n  | j | ƒ | j ƒ  |  j rü | j d ƒ t | d	 |  j d
 | d t d t ƒ\ } } n7 | j | j ƒ t | d	 |  j d t d t ƒ\ } } | j d d ƒ } | j d d ƒ } | j | ƒ } Wd  QXt j | j ƒ t d | d t
 ƒ | S(   Nu	   -encodingu    t   optionsR   t   modeu   wbt   deletei    t	   classpatht   stdint   stdoutt   stderrs   Â t    s     (   R!   t   extendR"   R.   R2   R   R   R#   t   tempfilet   NamedTemporaryFileR,   t
   isinstanceR   t   encodet   writet   flusht
   _USE_STDINt   seekR   R    R   t   namet   replacet   decodeR   t   unlink(	   R$   R@   t   input_R   R'   t   default_optionst
   input_fileRN   RO   (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyR>   ê   s8    		
		$N(   t   __name__t
   __module__t   __doc__R   R   R<   R,   RX   R0   t   NoneR+   R9   RA   RD   RC   RF   RE   R>   (    (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyR   #   s(   )	't   StanfordParserc           B  s&   e  Z d  Z d Z d „  Z d „  Z RS(   u{  
    >>> parser=StanfordParser(
    ...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
    ... )

    >>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog")) # doctest: +NORMALIZE_WHITESPACE
    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])]

    >>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents((
    ...     "the quick brown fox jumps over the lazy dog",
    ...     "the quick grey wolf jumps over the lazy fox"
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE
    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP',
    [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['grey']), Tree('NN', ['wolf'])]), Tree('NP',
    [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']),
    Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])]

    >>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE
    [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
    Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
    [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
    Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', [Tree('', []),
    Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', [])])])])])])])]

    >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents((
    ...     (
    ...         ("The", "DT"),
    ...         ("quick", "JJ"),
    ...         ("brown", "JJ"),
    ...         ("fox", "NN"),
    ...         ("jumped", "VBD"),
    ...         ("over", "IN"),
    ...         ("the", "DT"),
    ...         ("lazy", "JJ"),
    ...         ("dog", "NN"),
    ...         (".", "."),
    ...     ),
    ... ))],[]) # doctest: +NORMALIZE_WHITESPACE
    [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
    Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP',
    [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
    u   pennc         O  s3   t  j d t d d ƒt t |  ƒ j | | Ž  d  S(   Nuc   The StanfordParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPParser[0m instead.t
   stackleveli   (   t   warningst   warnt   DeprecationWarningt   superRe   R+   (   R$   t   argst   kwargs(    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyR+   N  s
    c         C  s   t  j | ƒ S(   N(   R   t
   fromstring(   R$   t   result(    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyR1   X  s    (   Ra   Rb   Rc   R=   R+   R1   (    (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyRe     s   1	
t   StanfordDependencyParserc           B  s&   e  Z d  Z d Z d „  Z d „  Z RS(   uT
  
    >>> dep_parser=StanfordDependencyParser(
    ...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
    ... )

    >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])]

    >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE
    [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
    ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
    ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
    ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
    ...     "The quick brown fox jumps over the lazy dog.",
    ...     "The quick grey wolf jumps over the lazy fox."
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]),
    Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE
    [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])]

    >>> sum([[list(parse.triples()) for parse in dep_graphs] for dep_graphs in dep_parser.tagged_parse_sents((
    ...     (
    ...         ("The", "DT"),
    ...         ("quick", "JJ"),
    ...         ("brown", "JJ"),
    ...         ("fox", "NN"),
    ...         ("jumped", "VBD"),
    ...         ("over", "IN"),
    ...         ("the", "DT"),
    ...         ("lazy", "JJ"),
    ...         ("dog", "NN"),
    ...         (".", "."),
    ...     ),
    ... ))],[]) # doctest: +NORMALIZE_WHITESPACE
    [[((u'jumped', u'VBD'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
    ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
    ((u'jumped', u'VBD'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
    ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]

    u	   conll2007c         O  s3   t  j d t d d ƒt t |  ƒ j | | Ž  d  S(   Nuw   The StanfordDependencyParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPDependencyParser[0m instead.Rf   i   (   Rg   Rh   Ri   Rj   Ro   R+   (   R$   Rk   Rl   (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyR+     s
    c         C  s   t  | d d ƒS(   Nt   top_relation_labelu   root(   R
   (   R$   Rn   (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyR1   š  s    (   Ra   Rb   Rc   R=   R+   R1   (    (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyRo   \  s   0	
t   StanfordNeuralDependencyParserc           B  sP   e  Z d  Z d Z d Z d Z d Z e Z e Z	 d „  Z
 e d „ Z d „  Z RS(   u9  
    >>> from nltk.parse.stanford import StanfordNeuralDependencyParser
    >>> dep_parser=StanfordNeuralDependencyParser(java_options='-mx4g')

    >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy']), '.'])]

    >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE
    [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det',
    (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'),
    u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')),
    ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det',
    (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ')), ((u'jumps', u'VBZ'),
    u'punct', (u'.', u'.'))]]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
    ...     "The quick brown fox jumps over the lazy dog.",
    ...     "The quick grey wolf jumps over the lazy fox."
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE
    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over',
    'the', 'lazy']), '.']), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']),
    Tree('fox', ['over', 'the', 'lazy']), '.'])]

    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
    ...     "I 'm a dog".split(),
    ...     "This is my friends ' cat ( the tabby )".split(),
    ... ))], []) # doctest: +NORMALIZE_WHITESPACE
    [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends',
    ['my', "'"]), Tree('tabby', ['-LRB-', 'the', '-RRB-'])])]
    u   conllu)   edu.stanford.nlp.pipeline.StanfordCoreNLPu%   stanford-corenlp-(\d+)(\.(\d+))+\.jaru,   stanford-corenlp-(\d+)(\.(\d+))+-models\.jarc         O  sB   t  j d t d d ƒt t |  ƒ j | | Ž  |  j d 7_ d  S(   Nu}   The StanfordNeuralDependencyParser will be deprecated
Please use [91mnltk.parse.corenlp.CoreNLPDependencyParser[0m instead.Rf   i   u(   -annotators tokenize,ssplit,pos,depparse(   Rg   Rh   Ri   Rj   Rq   R+   R"   (   R$   Rk   Rl   (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyR+   Å  s    c         C  s   t  d ƒ ‚ d S(   u¶   
        Currently unimplemented because the neural dependency parser (and
        the StanfordCoreNLP pipeline class) doesn't support passing in pre-
        tagged tokens.
        ux   tagged_parse[_sents] is not supported by StanfordNeuralDependencyParser; use parse[_sents] or raw_parse[_sents] instead.N(   t   NotImplementedError(   R$   R?   R   (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyRE   Ð  s    c         C  s   t  | d d ƒS(   NRp   u   ROOT(   R
   (   R$   Rn   (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyR1   Ü  s    (   Ra   Rb   Rc   R=   R<   R   R   R   RX   R0   R+   R,   RE   R1   (    (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyRq   ž  s   	uE   doctests from nltk.parse.stanford are skipped because it's deprecatedc         C  sL   d d l  m } y t d d ƒ t ƒ  Wn t k
 rG | d ƒ ‚ n Xd  S(   Niÿÿÿÿ(   t   SkipTestR   u4   edu/stanford/nlp/models/lexparser/englishPCFG.ser.gzun   doctests from nltk.parse.stanford are skipped because one of the stanford parser or CoreNLP jars doesn't exist(   t   noseRs   Re   Rq   t   LookupError(   t   moduleRs   (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyt   setup_moduleà  s    (   t
   __future__R    RR   R   Rg   t   unittestR   t
   subprocessR   t   sixR   t   nltk.internalsR   R   R   R   R   t   nltk.parse.apiR	   t   nltk.parse.dependencygraphR
   t	   nltk.treeR   R   R   Re   Ro   Rq   Rw   (    (    (    s2   lib/python2.7/site-packages/nltk/parse/stanford.pyt   <module>
   s    (öCBB