B
    >?ð[¯&  ã               @   sÂ   d dl mZmZmZ d dlmZ d dlZd dlmZ d dl	m
Z
 yd dlZW n ek
r`   Y nX d dlmZ d dlmZ G dd	„ d	eƒZd
d„ Zdd„ ZG dd„ deƒZeG dd„ deƒƒZdS )é    )Úprint_functionÚunicode_literalsÚdivision)ÚabstractmethodN)Ústdout)Úsqrt)ÚClusterI)Úpython_2_unicode_compatiblec               @   sd   e Zd ZdZddd„Zddd„Zedd	„ ƒZd
d„ Zedd„ ƒZ	dd„ Z
dd„ Zdd„ Zdd„ ZdS )ÚVectorSpaceClustererz©
    Abstract clusterer which takes tokens and maps them into a vector space.
    Optionally performs singular value decomposition to reduce the
    dimensionality.
    FNc             C   s   d| _ || _|| _dS )a)  
        :param normalise:       should vectors be normalised to length 1
        :type normalise:        boolean
        :param svd_dimensions:  number of dimensions to use in reducing vector
                                dimensionsionality with SVD
        :type svd_dimensions:   int
        N)Ú_TtÚ_should_normaliseÚ_svd_dimensions)ÚselfZ	normaliseZsvd_dimensions© r   ú0lib/python3.7/site-packages/nltk/cluster/util.pyÚ__init__   s    zVectorSpaceClusterer.__init__c       
         sê   t |ƒdkst‚ˆ jr&ttˆ j|ƒƒ}ˆ jrÄˆ jt |d ƒk rÄtj 	t 
t |¡¡¡\}}}|d ˆ j… t ˆ jtj¡ }|d d …d ˆ j…f }|d ˆ j…d d …f }	t 
t ||	¡¡}t 
|¡ˆ _ˆ  ||¡ |ræ‡ fdd„|D ƒS d S )Nr   c                s   g | ]}ˆ   |¡‘qS r   )Úclassify)Ú.0Úvector)r   r   r   ú
<listcomp>B   s    z0VectorSpaceClusterer.cluster.<locals>.<listcomp>)ÚlenÚAssertionErrorr   ÚlistÚmapÚ
_normaliser   ÚnumpyZlinalgZsvdZ	transposeZarrayZidentityZfloat64Údotr   Úcluster_vectorspace)
r   ÚvectorsZassign_clustersÚtraceÚuÚdZvtÚSÚTZDtr   )r   r   Úcluster+   s    zVectorSpaceClusterer.clusterc             C   s   dS )zD
        Finds the clusters using the given set of vectors.
        Nr   )r   r   r   r   r   r   r   D   s    z(VectorSpaceClusterer.cluster_vectorspacec             C   s<   | j r|  |¡}| jd k	r(t | j|¡}|  |¡}|  |¡S )N)r   r   r   r   r   Úclassify_vectorspaceZcluster_name)r   r   r$   r   r   r   r   J   s    


zVectorSpaceClusterer.classifyc             C   s   dS )zN
        Returns the index of the appropriate cluster for the vector.
        Nr   )r   r   r   r   r   r%   R   s    z)VectorSpaceClusterer.classify_vectorspacec             C   s4   | j r|  |¡}| jd k	r(t | j|¡}|  ||¡S )N)r   r   r   r   r   Úlikelihood_vectorspace)r   r   Zlabelr   r   r   Ú
likelihoodX   s
    

zVectorSpaceClusterer.likelihoodc             C   s   |   |¡}||krdS dS )zP
        Returns the likelihood of the vector belonging to the cluster.
        g      ð?g        )r%   )r   r   r$   Z	predictedr   r   r   r&   _   s    
z+VectorSpaceClusterer.likelihood_vectorspacec             C   s,   | j r|  |¡}| jdk	r(t | j|¡}|S )zU
        Returns the vector after normalisation and dimensionality reduction
        N)r   r   r   r   r   )r   r   r   r   r   r   f   s
    

zVectorSpaceClusterer.vectorc             C   s   |t t ||¡ƒ S )z7
        Normalises the vector to unit length.
        )r   r   r   )r   r   r   r   r   r   p   s    zVectorSpaceClusterer._normalise)FN)FF)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r$   r   r   r   r%   r'   r&   r   r   r   r   r   r   r
      s   


r
   c             C   s   | | }t t ||¡ƒS )z}
    Returns the euclidean distance between vectors u and v. This is equivalent
    to the length of the vector (u - v).
    )r   r   r   )r    ÚvZdiffr   r   r   Úeuclidean_distancew   s    r-   c             C   s0   dt  | |¡tt  | | ¡ƒtt  ||¡ƒ   S )zs
    Returns 1 minus the cosine of the angle between vectors v and u. This is
    equal to 1 - (u.v / |u||v|).
    é   )r   r   r   )r    r,   r   r   r   Úcosine_distance€   s    r/   c               @   s2   e Zd ZdZdd„ Zddd„Zdd„ Zd	d
„ ZdS )Ú_DendrogramNodez Tree node of a dendrogram. c             G   s   || _ || _d S )N)Ú_valueÚ	_children)r   ÚvalueZchildrenr   r   r   r   ‹   s    z_DendrogramNode.__init__Tc             C   sD   | j r.g }x| j D ]}| | |¡¡ qW |S |r:| jgS | gS d S )N)r2   ÚextendÚleavesr1   )r   Úvaluesr5   Úchildr   r   r   r5      s    z_DendrogramNode.leavesc             C   s¦   | j | fg}xpt|ƒ|k r|| ¡ \}}|js<| ||f¡ P x4|jD ]*}|jr`| |j |f¡ qD| d|f¡ qDW | ¡  qW g }x|D ]\}}| | ¡ ¡ qˆW |S )Nr   )r1   r   Úpopr2   ÚpushÚappendÚsortr5   )r   ÚnÚqueueÚpriorityÚnoder7   Úgroupsr   r   r   r@   š   s    z_DendrogramNode.groupsc             C   s   t | j|jƒdk S )Nr   )r/   r1   )r   Z
comparatorr   r   r   Ú__lt__¯   s    z_DendrogramNode.__lt__N)T)r(   r)   r*   r+   r   r5   r@   rA   r   r   r   r   r0   ˆ   s
   
r0   c               @   s@   e Zd ZdZg fdd„Zdd„ Zdd„ Zg fdd	„Zd
d„ ZdS )Ú
Dendrograma  
    Represents a dendrogram, a tree with a specified branching order.  This
    must be initialised with the leaf items, then iteratively call merge for
    each branch. This class constructs a tree representing the order of calls
    to the merge function.
    c             C   s(   dd„ |D ƒ| _ t | j ¡| _d| _dS )zs
        :param  items: the items at the leaves of the dendrogram
        :type   items: sequence of (any)
        c             S   s   g | ]}t |ƒ‘qS r   )r0   )r   Úitemr   r   r   r   Á   s    z'Dendrogram.__init__.<locals>.<listcomp>r.   N)Ú_itemsÚcopyÚ_original_itemsÚ_merge)r   Úitemsr   r   r   r   ¼   s    zDendrogram.__init__c                sl   t |ƒdkst‚tˆ jf‡ fdd„|D ƒžŽ }ˆ  jd7  _|ˆ j|d < x|dd… D ]}ˆ j|= qXW dS )a=  
        Merges nodes at given indices in the dendrogram. The nodes will be
        combined which then replaces the first node specified. All other nodes
        involved in the merge will be removed.

        :param  indices: indices of the items to merge (at least two)
        :type   indices: seq of int
        é   c                s   g | ]}ˆ j | ‘qS r   )rD   )r   Úi)r   r   r   r   Ï   s    z$Dendrogram.merge.<locals>.<listcomp>r.   r   N)r   r   r0   rG   rD   )r   Úindicesr?   rJ   r   )r   r   ÚmergeÅ   s    	zDendrogram.mergec             C   s6   t | jƒdkr"t| jf| jžŽ }n
| jd }| |¡S )z’
        Finds the n-groups of items (leaves) reachable from a cut at depth n.
        :param  n: number of groups
        :type   n: int
        r.   r   )r   rD   r0   rG   r@   )r   r<   Úrootr   r   r   r@   Õ   s    
zDendrogram.groupsc                s@  d\}}}t | jƒdkr,t| jf| jžŽ }n
| jd }| j}|rF|}ndd„ |D ƒ}ttt |ƒƒd ‰ˆd ‰tˆˆ d ƒ‰d‡‡fdd	„	‰ d
d„ }|j|fg}	‡ fdd„|D ƒ}
x`|	r|	 	¡ \}}t
tdd„ |jƒƒ}t
t|j|ƒƒ}|rt|ƒ}t|ƒ}x´tt |ƒƒD ]¤}|| |krz||kr@|ˆ |d|ƒƒ n,||kr\|ˆ ||dƒƒ n|ˆ |||ƒƒ ˆ |ƒ|
|< n:||  kr’|kr¨n n|ˆ |||ƒƒ n||
| ƒ qW |dƒ x(|jD ]}|jrÊ|	 |j|f¡ qÊW |	 ¡  x|
D ]}||ƒ qúW |dƒ q¸W |d ‡fdd„|D ƒ¡ƒ |dƒ dS )z×
        Print the dendrogram in ASCII art to standard out.
        :param leaf_labels: an optional list of strings to use for labeling the
                            leaves
        :type leaf_labels: list
        )ú+ú-ú|r.   r   c             S   s   g | ]}d |j  ‘qS )z%s)r1   )r   Úleafr   r   r   r   ö   s    z#Dendrogram.show.<locals>.<listcomp>rI   ú c                s   dˆ | | |ˆ f S )Nz%s%s%sr   )ZcentreÚleftÚright)ÚlhalfÚrhalfr   r   Úformatþ   s    zDendrogram.show.<locals>.formatc             S   s   t  | ¡ d S )N)r   Úwrite)Ústrr   r   r   Údisplay  s    z Dendrogram.show.<locals>.displayc                s   g | ]}ˆ d ƒ‘qS )rR   r   )r   rQ   )rW   r   r   r     s    c             S   s   |   d¡d S )NFr   )r5   )Úcr   r   r   Ú<lambda>	  s    z!Dendrogram.show.<locals>.<lambda>Ú
Ú c             3   s   | ]}|  ˆ ¡V  qd S )N)Úcenter)r   rC   )Úwidthr   r   ú	<genexpr>&  s    z"Dendrogram.show.<locals>.<genexpr>N)rR   rR   )r   rD   r0   rG   rF   Úmaxr   Úintr1   r8   r   r2   ÚindexÚminÚranger:   r;   Újoin)r   Zleaf_labelsZJOINZHLINKZVLINKrM   r5   Zlast_rowrZ   r=   Z	verticalsr>   r?   Zchild_left_leafrK   Zmin_idxZmax_idxrJ   r7   Zverticalr   )rW   rU   rV   r`   r   Úshowá   sV    	





zDendrogram.showc             C   sB   t | jƒdkr"t| jf| jžŽ }n
| jd }| d¡}dt |ƒ S )Nr.   r   Fz<Dendrogram with %d leaves>)r   rD   r0   rG   r5   )r   rM   r5   r   r   r   Ú__repr__)  s
    

zDendrogram.__repr__N)	r(   r)   r*   r+   r   rL   r@   rh   ri   r   r   r   r   rB   ³   s   	HrB   )Z
__future__r   r   r   Úabcr   rE   Úsysr   Zmathr   r   ÚImportErrorZnltk.cluster.apir   Znltk.compatr	   r
   r-   r/   Úobjectr0   rB   r   r   r   r   Ú<module>   s    _	+