B
    P?ð["  ã               @   s(   d dl mZ d dlZG dd„ deƒZdS )é    )ÚdivisionNc               @   s(   e Zd ZdZdd„ Zdd„ Zdd„ ZdS )	ÚDependencyEvaluatora;  
    Class for measuring labelled and unlabelled attachment score for
    dependency parsing. Note that the evaluation ignores punctuation.

    >>> from nltk.parse import DependencyGraph, DependencyEvaluator

    >>> gold_sent = DependencyGraph("""
    ... Pierre  NNP     2       NMOD
    ... Vinken  NNP     8       SUB
    ... ,       ,       2       P
    ... 61      CD      5       NMOD
    ... years   NNS     6       AMOD
    ... old     JJ      2       NMOD
    ... ,       ,       2       P
    ... will    MD      0       ROOT
    ... join    VB      8       VC
    ... the     DT      11      NMOD
    ... board   NN      9       OBJ
    ... as      IN      9       VMOD
    ... a       DT      15      NMOD
    ... nonexecutive    JJ      15      NMOD
    ... director        NN      12      PMOD
    ... Nov.    NNP     9       VMOD
    ... 29      CD      16      NMOD
    ... .       .       9       VMOD
    ... """)

    >>> parsed_sent = DependencyGraph("""
    ... Pierre  NNP     8       NMOD
    ... Vinken  NNP     1       SUB
    ... ,       ,       3       P
    ... 61      CD      6       NMOD
    ... years   NNS     6       AMOD
    ... old     JJ      2       NMOD
    ... ,       ,       3       AMOD
    ... will    MD      0       ROOT
    ... join    VB      8       VC
    ... the     DT      11      AMOD
    ... board   NN      9       OBJECT
    ... as      IN      9       NMOD
    ... a       DT      15      NMOD
    ... nonexecutive    JJ      15      NMOD
    ... director        NN      12      PMOD
    ... Nov.    NNP     9       VMOD
    ... 29      CD      16      NMOD
    ... .       .       9       VMOD
    ... """)

    >>> de = DependencyEvaluator([parsed_sent],[gold_sent])
    >>> las, uas = de.eval()
    >>> las
    0.6...
    >>> uas
    0.8...
    >>> abs(uas - 0.8) < 0.00001
    True
    c             C   s   || _ || _dS )z‰
        :param parsed_sents: the list of parsed_sents as the output of parser
        :type parsed_sents: list(DependencyGraph)
        N)Ú_parsed_sentsÚ_gold_sents)ÚselfZparsed_sentsZ
gold_sents© r   ú2lib/python3.7/site-packages/nltk/parse/evaluate.pyÚ__init__I   s    zDependencyEvaluator.__init__c                s.   t dddddddgƒ‰ d ‡ fd	d
„|D ƒ¡S )z©
        Function to remove punctuation from Unicode string.
        :param input: the input string
        :return: Unicode string after remove all punctuation
        ZPcZPdZPsZPeZPiZPfZPoÚ c             3   s    | ]}t  |¡ˆ kr|V  qd S )N)ÚunicodedataÚcategory)Ú.0Úx)Úpunc_catr   r   ú	<genexpr>X   s    z4DependencyEvaluator._remove_punct.<locals>.<genexpr>)ÚsetÚjoin)r   ZinStrr   )r   r   Ú_remove_punctQ   s    z!DependencyEvaluator._remove_punctc       
      C   s  t | jƒt | jƒkrtdƒ‚d}d}d}xÖtt | jƒƒD ]Ä}| j| j}| j| j}t |ƒt |ƒkrltdƒ‚xŽ| ¡ D ]‚\}}|| }	|d dkr”qv|d |	d kr¬tdƒ‚|  |d ¡dkrÀqv|d7 }|d	 |	d	 krv|d7 }|d
 |	d
 krv|d7 }qvW q8W || || fS )z†
        Return the Labeled Attachment Score (LAS) and Unlabeled Attachment Score (UAS)

        :return : tuple(float,float)
        zE Number of parsed sentence is different with number of gold sentence.r   z!Sentences must have equal length.ZwordNz!Sentence sequence is not matched.r
   é   ÚheadZrel)Úlenr   r   Ú
ValueErrorÚrangeZnodesÚitemsr   )
r   ZcorrZcorrLZtotalÚiZparsed_sent_nodesZgold_sent_nodesZparsed_node_addressZparsed_nodeZ	gold_noder   r   r   ÚevalZ   s2    zDependencyEvaluator.evalN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r	   r   r   r   r   r   r   r      s   9	r   )Z
__future__r   r   Úobjectr   r   r   r   r   Ú<module>	   s   