B
    >?[                 @   sV   d dl mZmZ d dlmZ d dlmZ eG dd deZdd Z	e
dkrRe	  d	S )
    )print_functionunicode_literals)FreqDist)python_2_unicode_compatiblec               @   sD   e Zd ZdZdddZdd Zdd Zd	d
 ZdddZdd Z	dS )ConfusionMatrixa  
    The confusion matrix between a list of reference values and a
    corresponding list of test values.  Entry *[r,t]* of this
    matrix is a count of the number of times that the reference value
    *r* corresponds to the test value *t*.  E.g.:

        >>> from nltk.metrics import ConfusionMatrix
        >>> ref  = 'DET NN VB DET JJ NN NN IN DET NN'.split()
        >>> test = 'DET VB VB DET NN NN NN IN DET NN'.split()
        >>> cm = ConfusionMatrix(ref, test)
        >>> print(cm['NN', 'NN'])
        3

    Note that the diagonal entries *Ri=Tj* of this matrix
    corresponds to correct values; and the off-diagonal entries
    correspond to incorrect values.
    Fc       	         s   t |t |krtd|rPt|t|fdd}tt|| |dntt|| tdd tD }fddD  d	}xLt||D ]>\}} ||  ||   d
7  < t| ||  ||  }qW | _	|| _
 | _|| _t || _t fddtt D | _dS )a  
        Construct a new confusion matrix from a list of reference
        values and a corresponding list of test values.

        :type reference: list
        :param reference: An ordered list of reference values.
        :type test: list
        :param test: A list of values to compare against the
            corresponding reference values.
        :raise ValueError: If ``reference`` and ``length`` do not have
            the same length.
        z Lists must have the same length.c                s    |  |    S )N )v)	ref_fdist
test_fdistr   ;lib/python3.7/site-packages/nltk/metrics/confusionmatrix.pykey6   s    z%ConfusionMatrix.__init__.<locals>.key)r   c             s   s   | ]\}}||fV  qd S )Nr   ).0ivalr   r   r   	<genexpr>>   s    z+ConfusionMatrix.__init__.<locals>.<genexpr>c                s   g | ]}d d  D qS )c             S   s   g | ]}d qS )r   r   )r   r   r   r   r   
<listcomp>A   s    z7ConfusionMatrix.__init__.<locals>.<listcomp>.<listcomp>r   )r   r   )valuesr   r   r   A   s    z,ConfusionMatrix.__init__.<locals>.<listcomp>r      c             3   s   | ]} | | V  qd S )Nr   )r   r   )	confusionr   r   r   R   s    N)len
ValueErrorr   sortedsetdict	enumeratezipmax_values_indices
_confusion	_max_conf_totalsumrange_correct)	self	referencetestsort_by_countr   indicesZmax_confwgr   )r   r	   r
   r   r   __init__!   s(    
zConfusionMatrix.__init__c             C   s*   |\}}| j | }| j | }| j| | S )z
        :return: The number of times that value ``li`` was expected and
        value ``lj`` was given.
        :rtype: int
        )r   r   )r%   Zli_lj_tupleliljr   jr   r   r   __getitem__T   s    

zConfusionMatrix.__getitem__c             C   s   d| j | jf S )Nz <ConfusionMatrix: %s/%s correct>)r$   r!   )r%   r   r   r   __repr___   s    zConfusionMatrix.__repr__c             C   s   |   S )N)pretty_format)r%   r   r   r   __str__b   s    zConfusionMatrix.__str__TNc                s   j } j}|r$t| fddd}|r4|d| }|rHdd |D }ndd tt|D }tdd	 |D }d
t| d }	|rd}
d}d}n.tt j}
d
t|
 d }d|
d  d }d}x|t|D ]p}|d| d 7 }xR|D ]J}||t| kr"|||| t|  |
d 7 }q|d|
d  7 }qW |d7 }qW |dd| d|
d t|  f 7 }xt	||D ]\}} j
| }||	| 7 }x|D ]} j
| }|| | dkr||7 }n:|r||d|| |   j  7 }n|||| |  7 }||kr4|d}|d| d ||d d  d }n|d7 }qW |d7 }qrW |dd| d|
d t|  f 7 }|d7 }|s|d7 }x*t|D ]\}}|d|d |f 7 }qW |S ) a  
        :return: A multi-line string representation of this confusion matrix.
        :type truncate: int
        :param truncate: If specified, then only show the specified
            number of values.  Any sorting (e.g., sort_by_count)
            will be performed before truncation.
        :param sort_by_count: If true, then sort by the count of each
            label in the reference data.  I.e., labels that occur more
            frequently in the reference label will be towards the left
            edge of the matrix, and labels that occur less frequently
            will be towards the right edge.

        @todo: add marginals?
        c                s   t  j j|    S )N)r"   r   r   )r   )r%   r   r   <lambda>   s    z/ConfusionMatrix.pretty_format.<locals>.<lambda>)r   Nc             S   s   g | ]}d | qS )z%sr   )r   r   r   r   r   r      s    z1ConfusionMatrix.pretty_format.<locals>.<listcomp>c             S   s   g | ]}t |d  qS )r   )str)r   nr   r   r   r      s    c             s   s   | ]}t |V  qd S )N)r   )r   r   r   r   r   r      s    z0ConfusionMatrix.pretty_format.<locals>.<genexpr>%zs |    z%5.1f%%z     .d r   . z |z |
z	%s-+-%s+
-r   g      Y@<>z|
z(row = reference; col = test)
zValue key:
z%6d: %s
)r   r   r   r#   r   r   reprr    rjustr   r   r!   rfindr   )r%   Zshow_percentsZvalues_in_charttruncater(   r   r   Zvalue_stringsZvaluelenZvalue_formatZentrylenZentry_formatZzerostrsr   r   r-   r.   r/   Z	prevspacevaluer   )r%   r   r2   e   sd    
$$



 

&$zConfusionMatrix.pretty_formatc             C   s\   | j }d}ttt|d }dt| d }x(tt|D ]}||||| f 7 }q<W |S )NzValue key:
r   z  %zd: %s
)r   r   r@   r#   )r%   r   r5   ZindexlenZ
key_formatr   r   r   r   r      s    zConfusionMatrix.key)F)FTNF)
__name__
__module____qualname____doc__r,   r0   r1   r3   r2   r   r   r   r   r   r      s   
3   
Zr   c              C   sT   d  } d  }td|  td| td tt| | tt| |jdd d S )Nz DET NN VB DET JJ NN NN IN DET NNz DET VB VB DET NN NN NN IN DET NNzReference =z	Test    =zConfusion matrix:T)r(   )splitprintr   r2   )r&   r'   r   r   r   demo   s    

rL   __main__N)Z
__future__r   r   Znltk.probabilityr   Znltk.compatr   objectr   rL   rF   r   r   r   r   <module>   s    B
