ó
ù`]c           @  sú   d  d l  m Z m Z m Z d  d l m Z d  d l Z d  d l m Z d  d l	 m
 Z
 y d  d l Z Wn e k
 r{ n Xd  d l m Z d  d l m Z d e f d	 „  ƒ  YZ d
 „  Z d „  Z d e f d „  ƒ  YZ e d e f d „  ƒ  Yƒ Z d S(   iÿÿÿÿ(   t   print_functiont   unicode_literalst   division(   t   abstractmethodN(   t   stdout(   t   sqrt(   t   ClusterI(   t   python_2_unicode_compatiblet   VectorSpaceClustererc           B  sw   e  Z d  Z e d
 d „ Z e e d „ Z e d „  ƒ Z d „  Z	 e d „  ƒ Z
 d „  Z d „  Z d „  Z d	 „  Z RS(   u©   
    Abstract clusterer which takes tokens and maps them into a vector space.
    Optionally performs singular value decomposition to reduce the
    dimensionality.
    c         C  s   d |  _ | |  _ | |  _ d S(   u)  
        :param normalise:       should vectors be normalised to length 1
        :type normalise:        boolean
        :param svd_dimensions:  number of dimensions to use in reducing vector
                                dimensionsionality with SVD
        :type svd_dimensions:   int
        N(   t   Nonet   _Ttt   _should_normaliset   _svd_dimensions(   t   selft	   normaliset   svd_dimensions(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyt   __init__   s    		c         C  sV  t  | ƒ d k s t ‚ |  j r< t t |  j | ƒ ƒ } n  |  j r|  j t  | d ƒ k  rt j j	 t j
 t j | ƒ ƒ ƒ \ } } } | |  j  t j |  j t j ƒ } | d  d  … d  |  j … f } | d  |  j … d  d  … f }	 t j
 t j | |	 ƒ ƒ } t j
 | ƒ |  _ n  |  j | | ƒ | rRg  | D] }
 |  j |
 ƒ ^ q9Sd  S(   Ni    (   t   lent   AssertionErrorR   t   listt   mapt
   _normaliseR   t   numpyt   linalgt   svdt	   transposet   arrayt   identityt   float64t   dotR
   t   cluster_vectorspacet   classify(   R   t   vectorst   assign_clusterst   tracet   ut   dt   vtt   St   Tt   Dtt   vector(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyt   cluster+   s    	"-c         C  s   d S(   uD   
        Finds the clusters using the given set of vectors.
        N(    (   R   R    R"   (    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyR   D   t    c         C  s^   |  j  r |  j | ƒ } n  |  j d  k	 rB t j |  j | ƒ } n  |  j | ƒ } |  j | ƒ S(   N(   R   R   R
   R	   R   R   t   classify_vectorspacet   cluster_name(   R   R)   R*   (    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyR   J   s    	c         C  s   d S(   uN   
        Returns the index of the appropriate cluster for the vector.
        N(    (   R   R)   (    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyR,   R   R+   c         C  sR   |  j  r |  j | ƒ } n  |  j d  k	 rB t j |  j | ƒ } n  |  j | | ƒ S(   N(   R   R   R
   R	   R   R   t   likelihood_vectorspace(   R   R)   t   label(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyt
   likelihoodX   s
    	c         C  s#   |  j  | ƒ } | | k r d Sd S(   uP   
        Returns the likelihood of the vector belonging to the cluster.
        g      ð?g        (   R,   (   R   R)   R*   t	   predicted(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyR.   _   s    c         C  sF   |  j  r |  j | ƒ } n  |  j d k	 rB t j |  j | ƒ } n  | S(   uU   
        Returns the vector after normalisation and dimensionality reduction
        N(   R   R   R
   R	   R   R   (   R   R)   (    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyR)   f   s
    	c         C  s   | t  t j | | ƒ ƒ S(   u7   
        Normalises the vector to unit length.
        (   R   R   R   (   R   R)   (    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyR   p   s    N(   t   __name__t
   __module__t   __doc__t   FalseR	   R   R*   R   R   R   R,   R0   R.   R)   R   (    (    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyR      s   				
c         C  s    |  | } t  t j | | ƒ ƒ S(   u}   
    Returns the euclidean distance between vectors u and v. This is equivalent
    to the length of the vector (u - v).
    (   R   R   R   (   R#   t   vt   diff(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyt   euclidean_distancew   s    
c         C  s@   d t  j |  | ƒ t t  j |  |  ƒ ƒ t t  j | | ƒ ƒ S(   us   
    Returns 1 minus the cosine of the angle between vectors v and u. This is
    equal to 1 - (u.v / |u||v|).
    i   (   R   R   R   (   R#   R6   (    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyt   cosine_distance€   s    t   _DendrogramNodec           B  s5   e  Z d  Z d „  Z e d „ Z d „  Z d „  Z RS(   u    Tree node of a dendrogram. c         G  s   | |  _  | |  _ d  S(   N(   t   _valuet	   _children(   R   t   valuet   children(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyR   ‹   s    	c         C  sX   |  j  r= g  } x' |  j  D] } | j | j | ƒ ƒ q W| S| rM |  j g S|  g Sd  S(   N(   R<   t   extendt   leavesR;   (   R   t   valuesR@   t   child(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyR@      s    	
c         C  sä   |  j  |  f g } x› t | ƒ | k  r¯ | j ƒ  \ } } | j sY | j | | f ƒ Pn  xF | j D]; } | j r‹ | j | j  | f ƒ qc | j d | f ƒ qc W| j ƒ  q Wg  } x' | D] \ } } | j | j ƒ  ƒ q½ W| S(   Ni    (   R;   R   t   popR<   t   pusht   appendt   sortR@   (   R   t   nt   queuet   priorityt   nodeRB   t   groups(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyRK   š   s    		c         C  s   t  |  j | j ƒ d k  S(   Ni    (   R9   R;   (   R   t
   comparator(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyt   __lt__¯   s    (   R2   R3   R4   R   t   TrueR@   RK   RM   (    (    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyR:   ˆ   s
   		t
   Dendrogramc           B  sA   e  Z d  Z g  d „ Z d „  Z d „  Z g  d „ Z d „  Z RS(   u  
    Represents a dendrogram, a tree with a specified branching order.  This
    must be initialised with the leaf items, then iteratively call merge for
    each branch. This class constructs a tree representing the order of calls
    to the merge function.
    c         C  sD   g  | D] } t  | ƒ ^ q |  _ t j |  j ƒ |  _ d |  _ d S(   us   
        :param  items: the items at the leaves of the dendrogram
        :type   items: sequence of (any)
        i   N(   R:   t   _itemst   copyt   _original_itemst   _merge(   R   t   itemst   item(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyR   ¼   s    "c         G  s‡   t  | ƒ d k s t ‚ t |  j g  | D] } |  j | ^ q( Œ } |  j d 7_ | |  j | d <x | d D] } |  j | =qo Wd S(   u=  
        Merges nodes at given indices in the dendrogram. The nodes will be
        combined which then replaces the first node specified. All other nodes
        involved in the merge will be removed.

        :param  indices: indices of the items to merge (at least two)
        :type   indices: seq of int
        i   i   i    N(   R   R   R:   RS   RP   (   R   t   indicest   iRJ   (    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyt   mergeÅ   s    	,c         C  sG   t  |  j ƒ d k r- t |  j |  j Œ } n |  j d } | j | ƒ S(   u’   
        Finds the n-groups of items (leaves) reachable from a cut at depth n.
        :param  n: number of groups
        :type   n: int
        i   i    (   R   RP   R:   RS   RK   (   R   RG   t   root(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyRK   Õ   s    c           s   d \ } } } t  |  j ƒ d k r< t |  j |  j Œ } n |  j d } |  j } | ra | } n  g  | D] } d | j ^ qh } t t t  | ƒ ƒ d ‰ ˆ d ‰  t ˆ ˆ  d ƒ ‰ d d ‡  ‡ f d	 † }	 d
 „  }
 | j | f g } g  | D] } |	 d ƒ ^ qò } xÂ| rÎ| j	 ƒ  \ } } t
 t d „  | j ƒ ƒ } t
 t | j | ƒ ƒ } | ryt | ƒ } t | ƒ } n  xà t t  | ƒ ƒ D]Ì } | | | k r| | k rÇ|
 |	 | d | ƒ ƒ n; | | k rì|
 |	 | | d ƒ ƒ n |
 |	 | | | ƒ ƒ |	 | ƒ | | <qŒ| | k o,| k n rJ|
 |	 | | | ƒ ƒ qŒ|
 | | ƒ qŒW|
 d ƒ x3 | j D]( } | j rp| j | j | f ƒ qpqpW| j ƒ  x | D] } |
 | ƒ q­W|
 d ƒ qW|
 d j ‡ f d †  | Dƒ ƒ ƒ |
 d ƒ d S(   u×   
        Print the dendrogram in ASCII art to standard out.
        :param leaf_labels: an optional list of strings to use for labeling the
                            leaves
        :type leaf_labels: list
        u   +u   -u   |i   i    u   %si   u    c           s   d ˆ  | |  | ˆ f S(   Nu   %s%s%s(    (   t   centret   leftt   right(   t   lhalft   rhalf(    s0   lib/python2.7/site-packages/nltk/cluster/util.pyt   formatþ   s    c         S  s   t  j |  ƒ d  S(   N(   R   t   write(   t   str(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyt   display  s    c         S  s   |  j  t ƒ d S(   Ni    (   R@   R5   (   t   c(    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyt   <lambda>	  R+   u   
u    c         3  s   |  ] } | j  ˆ  ƒ Vq d  S(   N(   t   center(   t   .0RU   (   t   width(    s0   lib/python2.7/site-packages/nltk/cluster/util.pys	   <genexpr>&  s    N(   u   +u   -u   |(   R   RP   R:   RS   RR   R;   t   maxR   t   intRC   R   R<   t   indext   mint   rangeRE   RF   t   join(   R   t   leaf_labelst   JOINt   HLINKt   VLINKRY   R@   t   last_rowt   leafR_   Rb   RH   t	   verticalsRI   RJ   t   child_left_leafRV   t   min_idxt   max_idxRW   RB   t   vertical(    (   R]   R^   Rg   s0   lib/python2.7/site-packages/nltk/cluster/util.pyt   showá   sV    			 
		
	
#c         C  sW   t  |  j ƒ d k r- t |  j |  j Œ } n |  j d } | j t ƒ } d t  | ƒ S(   Ni   i    u   <Dendrogram with %d leaves>(   R   RP   R:   RS   R@   R5   (   R   RY   R@   (    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyt   __repr__)  s
    (   R2   R3   R4   R   RX   RK   Ry   Rz   (    (    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyRO   ³   s   			H(   t
   __future__R    R   R   t   abcR   RQ   t   sysR   t   mathR   R   t   ImportErrort   nltk.cluster.apiR   t   nltk.compatR   R   R8   R9   t   objectR:   RO   (    (    (    s0   lib/python2.7/site-packages/nltk/cluster/util.pyt   <module>   s    _			+