B
    P?[?                 @   s   d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZmZmZ d dlmZ d dlmZ d dlmZ d	d
 Zdd Zdd ZG dd deZedkrd dlZe  dS )    )print_functionunicode_literalsN)	text_type)ZipFilePathPointer)find_dir	find_filefind_jars_within_path)ParserI)DependencyGraph)taggedsents_to_conllc              C   sR   ddl m}  | ddddddd	d
ddddddddddddddddddddg}|jS )Nr   )RegexpTagger)z\.$.)z\,$,)z\?$?)z\($()z\)$))z\[$[)z\]$])z^-?[0-9]+(.[0-9]+)?$ZCD)z(The|the|A|a|An|an)$ZDT)z&(He|he|She|she|It|it|I|me|Me|You|you)$ZPRP)z(His|his|Her|her|Its|its)$zPRP$)z(my|Your|your|Yours|yours)$zPRP$)z (on|On|in|In|at|At|since|Since)$IN)z (for|For|ago|Ago|before|Before)$r   )z(till|Till|until|Until)$r   )z(by|By|beside|Beside)$r   )z(under|Under|below|Below)$r   )z(over|Over|above|Above)$r   )z (across|Across|through|Through)$r   )z(into|Into|towards|Towards)$r   )z(onto|Onto|from|From)$r   )z.*able$ZJJ)z.*ness$NN)z.*ly$ZRB)z.*s$ZNNS)z.*ing$ZVBG)z.*ed$ZVBD)z.*r   )Znltk.tagr   tag)r   Z_tagger r   .lib/python3.7/site-packages/nltk/parse/malt.pymalt_regex_tagger   s>    r   c             C   s   t j| r| }nt| dd}dddg}tt|}tdd |D }tdddg}||sbttt	d	d
 |sxtt
|S )zE
    A module to find MaltParser .jar file and its dependencies.
    )ZMALT_PARSER)env_vars c             s   s   | ]}t j|d  V  qdS )   N)ospathsplit).0Zjarr   r   r   	<genexpr>N   s    z"find_maltparser.<locals>.<genexpr>z	log4j.jarz
libsvm.jarzliblinear-1.8.jarc             S   s   |  do| dS )Nzmaltparser-z.jar)
startswithendswith)ir   r   r   <lambda>S   s    z!find_maltparser.<locals>.<lambda>)r   r   existsr   setr   issubsetAssertionErroranyfilterlist)parser_dirnameZ	_malt_dirZmalt_dependenciesZ
_malt_jarsZ_jarsr   r   r   find_maltparserC   s    
r.   c             C   s.   | dkrdS t j| r| S t| dddS dS )z8
    A module to find pre-trained MaltParser model.
    Nzmalt_temp.mco)Z
MALT_MODELF)r   verbose)r   r   r&   r   )model_filenamer   r   r   find_malt_modelX   s
    r1   c               @   sZ   e Zd ZdZdddZdddZdd	d
ZdddZedddZ	dddZ
dddZdS )
MaltParsera  
    A class for dependency parsing with MaltParser. The input is the paths to:
    - a maltparser directory
    - (optionally) the path to a pre-trained MaltParser .mco model file
    - (optionally) the tagger to use for POS tagging before parsing
    - (optionally) additional Java arguments

    Example:
        >>> from nltk.parse import malt
        >>> # With MALT_PARSER and MALT_MODEL environment set.
        >>> mp = malt.MaltParser('maltparser-1.7.2', 'engmalt.linear-1.7.mco') # doctest: +SKIP
        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
        (shot I (elephant an) (in (pajamas my)) .)
        >>> # Without MALT_PARSER and MALT_MODEL environment.
        >>> mp = malt.MaltParser('/home/user/maltparser-1.7.2/', '/home/user/engmalt.linear-1.7.mco') # doctest: +SKIP
        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
        (shot I (elephant an) (in (pajamas my)) .)
    Nc             C   sT   t || _|dk	r|ng | _t|| _| jdk| _t | _|dk	rH|nt	 | _
dS )a  
        An interface for parsing with the Malt Parser.

        :param parser_dirname: The path to the maltparser directory that
        contains the maltparser-1.x.jar
        :type parser_dirname: str
        :param model_filename: The name of the pre-trained model with .mco file
        extension. If provided, training will not be required.
        (see http://www.maltparser.org/mco/mco.html and
        see http://www.patful.com/chalk/node/185)
        :type model_filename: str
        :param tagger: The tagger used to POS tag the raw string before
        formatting to CONLL format. It should behave like `nltk.pos_tag`
        :type tagger: function
        :param additional_java_args: This is the additional Java arguments that
        one can use when calling Maltparser, usually this is the heapsize
        limits, e.g. `additional_java_args=['-Xmx1024m']`
        (see http://goo.gl/mpDBvQ)
        :type additional_java_args: list
        Nzmalt_temp.mco)r.   	malt_jarsadditional_java_argsr1   model_trainedtempfileZ
gettempdirworking_dirr   tagger)selfr-   r0   r9   r4   r   r   r   __init__x   s    


zMaltParser.__init__Fnullc             c   sP  | j stdtjd| jddd
}tjd| jddd}xt|D ]}|t| qFW |  | j	|j
|j
dd}t }yttj| jd	  W n   Y nX | ||}	t| |	d	k	rtd
d||	f t|j
2}
x*|
 dD ]}tt||dgV  qW W dQ R X W dQ R X W dQ R X t|j
 t|j
 dS )a  
        Use MaltParser to parse multiple POS tagged sentences. Takes multiple
        sentences where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentence: list(list(tuple(str, str)))
        :return: iter(iter(``DependencyGraph``)) the dependency graph
        representation of each sentence
        z0Parser has not been trained. Call train() first.zmalt_input.conll.wF)prefixdirmodedeletezmalt_output.conll.parse)r@   r   z0MaltParser parsing (%s) failed with exit code %d z

)top_relation_labelN)r6   	Exceptionr7   NamedTemporaryFiler8   r   writer   closegenerate_malt_commandnamer   getcwdchdirr   r   r5   _executejoinopenreaditerr
   remove)r:   	sentencesr/   rD   
input_fileZoutput_filelinecmdZ_current_pathretZinfileZtree_strr   r   r   parse_tagged_sents   sD    
2zMaltParser.parse_tagged_sentsc                s"    fdd|D } j |||dS )an  
        Use MaltParser to parse multiple sentences.
        Takes a list of sentences, where each sentence is a list of words.
        Each sentence will be automatically tagged with this
        MaltParser instance's tagger.

        :param sentences: Input sentences to parse
        :type sentence: list(list(str))
        :return: iter(DependencyGraph)
        c             3   s   | ]}  |V  qd S )N)r9   )r    Zsentence)r:   r   r   r!      s    z)MaltParser.parse_sents.<locals>.<genexpr>)rD   )rX   )r:   rS   r/   rD   Ztagged_sentencesr   )r:   r   parse_sents   s    zMaltParser.parse_sentsc             C   s   dg}|| j 7 }tjdr dnd}|d|| jg7 }|dg7 }tj| j	rl|dtj
| j	d g7 }n|d| j	g7 }|d	|g7 }|d
kr|d|g7 }|d|g7 }|S )a  
        This function generates the maltparser command use at the terminal.

        :param inputfilename: path to the input file
        :type inputfilename: str
        :param outputfilename: path to the output file
        :type outputfilename: str
        javawin;:z-cpzorg.maltparser.Maltz-cz-irB   z-oz-m)r4   sysplatformr"   rN   r3   r   r   r&   r5   r   )r:   ZinputfilenameZoutputfilenamer@   rV   Zclasspaths_separatorr   r   r   rI      s    


z MaltParser.generate_malt_commandc             C   s&   |rd nt j}t j| ||d}| S )N)stdoutstderr)
subprocessPIPEPopenwait)rV   r/   outputpr   r   r   rM     s    zMaltParser._executec          	   C   sb   t jd| jddd(}ddd |D }|t| W dQ R X | j|j|d	 t	|j dS )
z
        Train MaltParser from a list of ``DependencyGraph`` objects

        :param depgraphs: list of ``DependencyGraph`` objects for training input data
        :type depgraphs: DependencyGraph
        zmalt_train.conll.r=   F)r>   r?   r@   rA   
c             s   s   | ]}| d V  qdS )
   N)Zto_conll)r    Zdgr   r   r   r!   &  s    z#MaltParser.train.<locals>.<genexpr>N)r/   )
r7   rF   r8   rN   rG   r   train_from_filerJ   r   rR   )r:   Z	depgraphsr/   rT   Z	input_strr   r   r   train  s    	zMaltParser.trainc          
   C   s   t |tr`tjd| jddd<}| }| }|t| W dQ R X | j	|j
|dS Q R X | j|dd}| ||}|d	krtd
d||f d| _dS )z
        Train MaltParser from a file
        :param conll_file: str for the filename of the training input data
        :type conll_file: str
        zmalt_train.conll.r=   F)r>   r?   r@   rA   N)r/   Zlearn)r@   r   z1MaltParser training (%s) failed with exit code %drC   T)
isinstancer   r7   rF   r8   rO   rP   rG   r   rk   rJ   rI   rM   rE   rN   r6   )r:   Z
conll_filer/   rT   Zconll_input_fileZ	conll_strrV   rW   r   r   r   rk   -  s    	

zMaltParser.train_from_file)NNN)Fr<   )Fr<   )NN)F)F)F)__name__
__module____qualname____doc__r;   rX   rY   rI   staticmethodrM   rl   rk   r   r   r   r   r2   d   s     
%
B

 
r2   __main__)Z
__future__r   r   r   r_   r7   rc   inspectZsixr   Z	nltk.datar   Znltk.internalsr   r   r   Znltk.parse.apir	   Znltk.parse.dependencygraphr
   Znltk.parse.utilr   r   r.   r1   r2   rn   ZdoctestZtestmodr   r   r   r   <module>   s&   & gC