
\c        	   @   s  d  Z  d d l m Z m Z m Z m Z d d l Z d d l Z d d l	 m
 Z
 d d l m Z d d l m Z m Z d d l m Z d d	 l m Z m Z d d
 l m Z d d l m Z d d l m Z d d l m Z d d l m Z d d l m  Z  d   Z! d   Z" d d e$ d  Z% d d d d d e$ d  Z& d   Z' d   Z( d   Z) e* d e% d e' d e( d e)  Z+ d   Z, d e e f d      YZ- d! e- e f d"     YZ. d S(#   s   Hierarchical Agglomerative Clustering

These routines perform some hierarchical agglomerative clustering of some
input data.

Authors : Vincent Michel, Bertrand Thirion, Alexandre Gramfort,
          Gael Varoquaux
License: BSD 3 clause
i(   t   heapifyt   heappopt   heappusht   heappushpopN(   t   sparse(   t   connected_componentsi   (   t   BaseEstimatort   ClusterMixin(   t   six(   t   paired_distancest   pairwise_distances(   t   check_array(   t   check_memoryi   (   t   _hierarchical(   t   AgglomerationTransform(   t   IntFloatDict(   t   xrangec         C   s  |  j  d } | j  d | k s3 | j  d | k rR t d | j  |  j  f   n  | | j } t j |  s t j |  s t j |  } q | j   } n  t |  \ } } | d k rt	 j
 d | d d x t |  D] } t j | | k  d } |  | } x t |  D] }	 t j | |	 k  d }
 |  |
 } t | | d | } t j | t j |  k  \ } } | d } | d } t | | | |
 | f <t | |
 | | | f <qWq Wn  | | f S(   s   
    Fixes the connectivity matrix

        - copies it
        - makes it symmetric
        - converts it to LIL if necessary
        - completes it if necessary
    i    i   s4   Wrong shape for connectivity matrix: %s when X is %ssx   the number of connected components of the connectivity matrix is %d > 1. Completing it to avoid stopping the tree early.t
   stackleveli   t   metric(   t   shapet
   ValueErrort   TR   t   isspmatrix_lilt
   isspmatrixt
   lil_matrixt   tolilR   t   warningst   warnR   t   npt   whereR
   t   mint   True(   t   Xt   connectivityt   affinityt	   n_samplest   n_componentst   labelst   it   idx_it   Xit   jt   idx_jt   Xjt   Dt   iit   jj(    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyt   _fix_connectivity!   s8    		

$

#c         C   s  d d l  m } |  j d  }  t j d |  j j  j } | |  j |  j d k <| |  j    } | j	   } d | j | j | k <t j
 | j | j | j g  j }	 |	 t j |	 j d  d d  f }	 t j |	  }
 |
 d d  d d  f j t j  } t j | d t j } xq t | |  D]` \ } \ } } | d k	 r[| | k r[Pn  | | k  rt| | | <n  | | k  r-| | | <q-q-W| r|
 d d  d f } | | | | | f S| | | | f S(   s   
    Perform single linkage clustering on sparse data via the minimum
    spanning tree from scipy.sparse.csgraph, then using union-find to label.
    The parent array is then generated by walking through the tree.
    i(   t   minimum_spanning_treet   float64t   dtypei    i   N(   t   scipy.sparse.csgraphR0   t   astypeR   t   finfot   dataR2   t   epst   tocsrt   tocoot   vstackt   rowt   colR   t   argsortR   t   _single_linkage_labelt   intt   aranget   intpt	   enumeratet   None(   R!   R#   t   n_nodest
   n_clustersR$   t   return_distanceR0   t   epsilon_valuet   mstt	   mst_arrayt   single_linkage_treet	   children_t   parentR&   t   leftt   rightt	   distances(    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyt   _single_linkage_treeS   s.    $&("c   "   	   C   s  t  j |   }  |  j d k r3 t  j |  d  }  n  |  j \ } } | d	 k rd d l m } | d	 k	 r t j	 d d d n  t  j
 |  d d }  | j |   } | d	 d	  d	 d  f j t  j  } | r | d	 d	  d f }	 | d | d	 |	 f S| d | d	 f Sn  t |  | d
 d \ } }
 | d	 k rFd | d } n3 | | k rkt d | | f   n  d | | } g  } g  } g  } xv t | j  D]e \ } } | j |  g  | D] } | | k  r| ^ q} | j t |  | g  | j |  qWt  j | d t  j d d } t  j | d t  j d d } t  j | d d } d | | *t  j | | f d d } |  | | *t  j t |  d t  j d d } t j | | | | |  t t j j | | |   } t |  t  j  | d t  j } t  j! | d t" } g  } | r8t  j | |  }	 n  t  j | d t  j# d d } xt$ | |  D]} x7 t% rt& |  \ } } } | | ro| | roPqoqoW| | | | <| | <| j | | f  t' | | <| | <| r| |	 | | <n  | | | | | | <| | | | | | <g  } | j( d  d | | <t j) | | | | |  t j) | | | | |  g  | D] } | | j |  ^ q{| j |  t  j | d t  j d d } t  j | j d t  j d d } | j( |  t |  } t  j | d t  j d d } t j | | | | |  g  t$ |  D]& } t* | | | | | | f  ^ qAqfW| }  g  | D] }! |! d	 d	 d  ^ q|} t  j |  } | rt  j+ d |	  }	 | |
 |  | |	 f S| |
 |  | f Sd	 S(   s  Ward clustering based on a Feature matrix.

    Recursively merges the pair of clusters that minimally increases
    within-cluster variance.

    The inertia matrix uses a Heapq-based representation.

    This is the structured version, that takes into account some topological
    structure between samples.

    Read more in the :ref:`User Guide <hierarchical_clustering>`.

    Parameters
    ----------
    X : array, shape (n_samples, n_features)
        feature matrix  representing n_samples samples to be clustered

    connectivity : sparse matrix (optional).
        connectivity matrix. Defines for each sample the neighboring samples
        following a given structure of the data. The matrix is assumed to
        be symmetric and only the upper triangular half is used.
        Default is None, i.e, the Ward algorithm is unstructured.

    n_clusters : int (optional)
        Stop early the construction of the tree at n_clusters. This is
        useful to decrease computation time if the number of clusters is
        not small compared to the number of samples. In this case, the
        complete tree is not computed, thus the 'children' output is of
        limited use, and the 'parents' output should rather be used.
        This option is valid only when specifying a connectivity matrix.

    return_distance : bool (optional)
        If True, return the distance between the clusters.

    Returns
    -------
    children : 2D array, shape (n_nodes-1, 2)
        The children of each non-leaf node. Values less than `n_samples`
        correspond to leaves of the tree which are the original samples.
        A node `i` greater than or equal to `n_samples` is a non-leaf
        node and has children `children_[i - n_samples]`. Alternatively
        at the i-th iteration, children[i][0] and children[i][1]
        are merged to form node `n_samples + i`

    n_components : int
        The number of connected components in the graph.

    n_leaves : int
        The number of leaves in the tree

    parents : 1D array, shape (n_nodes, ) or None
        The parent of each node. Only returned when a connectivity matrix
        is specified, elsewhere 'None' is returned.

    distances : 1D array, shape (n_nodes-1, )
        Only returned if return_distance is set to True (for compatibility).
        The distances between the centers of the nodes. `distances[i]`
        corresponds to a weighted euclidean distance between
        the nodes `children[i, 1]` and `children[i, 2]`. If the nodes refer to
        leaves of the tree, then `distances[i]` is their unweighted euclidean
        distance. Distances are updated in the following way
        (from scipy.hierarchy.linkage):

        The new entry :math:`d(u,v)` is computed as follows,

        .. math::

           d(u,v) = \sqrt{\frac{|v|+|s|}
                               {T}d(v,s)^2
                        + \frac{|v|+|t|}
                               {T}d(v,t)^2
                        - \frac{|v|}
                               {T}d(s,t)^2}

        where :math:`u` is the newly joined cluster consisting of
        clusters :math:`s` and :math:`t`, :math:`v` is an unused
        cluster in the forest, :math:`T=|v|+|s|+|t|`, and
        :math:`|*|` is the cardinality of its argument. This is also
        known as the incremental algorithm.
    i   i(   t	   hierarchys   Partial build of the tree is implemented only for structured clustering (i.e. with explicit connectivity). The algorithm will build the full tree and only retain the lower branches required for the specified number of clustersR   i   t   requirementst   WNR"   t	   euclideans]   Cannot provide more clusters than samples. %i n_clusters was asked, and there are %i samples.R2   t   ordert   Ci    g       @(   ii   (,   R   t   asarrayt   ndimt   reshapeR   RC   t   scipy.clusterRQ   R   R   t   requiret   wardR4   RA   R/   R   RB   t   rowst   appendt   extendt   lent   arrayt   zerost   emptyR1   R   t   compute_ward_distt   listR   t   movest   zipR    R@   t   onest   boolt   int8t   rangeR   R   t   Falset   fillt   _get_parentsR   t   sqrt("   R    R!   RE   RF   R#   t
   n_featuresRQ   t   outRK   RO   R$   RD   t	   coord_rowt	   coord_colt   At   indR;   R&   t	   moments_1t	   moments_2t   inertiaRL   t	   used_nodet   childrent   not_visitedt   kt   inertR)   t   lt   n_additionst   init   idxt   n_leavest   c(    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyt	   ward_tree   s    Q
(%

$
	
$!8&t
   deprecatedt   completeRT   c   "         s(  | d k r t  j d t  n  t j |   }  |  j d k rR t j |  d  }  n  |  j \ } } i t j	 d 6t j
 d 6d d 6}	 y |	 | }
 Wn- t k
 r t d |	 j   | f   n X| d k r8d d	 l m } | d k	 r t  j d
 d d n  | d k r?t j |  j d d d \ } } |  | | f }  nw | d k rTd } nb | d k rid } nM t |  r| |   }  t j |  j d d d \ } } |  | | f }  n  | j |  d | d | } | d d  d d  f j t j  } | r(| d d  d f } | d | d | f S| d | d f St |  | d | \ } } | j   } | j | j k } | j | | _ | j | | _ | j | | _ ~ | d k r|  | j | j f j d  } n# t |  | j |  | j d | } | | _ | d k rd | d } n  | | k s-t  d | | } | d k r`t | | | | | |  S| r|t j | |  } n  t j | d t } t    } | j!   } x t" t# | j | j$   D]o \   \ } } t% t j | d t j& t j | d t j'  |   <| j(   f d   t# | |  D  qW~ t) |  t j* | d t j& } t j+ | d t j& } g  } xVt, | |  D]E} x4 t- rt. |  } | | j/ r| | j0 rPqqW| j/ } | j0 } | r| j1 | | | <n  | | | <| | <| j2 | | f  | | } | | } | | | | <t3 | | <| | <|
 | | | | | | |  } xD | D]< \ } }  | | j2 | |   t4 | t j5 |  | |   qqW| | | <d | | <| | <qW| }! t j6 |  d d  d d d  f } | r| | |! | | f S| | |! | f S(   s  Linkage agglomerative clustering based on a Feature matrix.

    The inertia matrix uses a Heapq-based representation.

    This is the structured version, that takes into account some topological
    structure between samples.

    Read more in the :ref:`User Guide <hierarchical_clustering>`.

    Parameters
    ----------
    X : array, shape (n_samples, n_features)
        feature matrix representing n_samples samples to be clustered

    connectivity : sparse matrix (optional).
        connectivity matrix. Defines for each sample the neighboring samples
        following a given structure of the data. The matrix is assumed to
        be symmetric and only the upper triangular half is used.
        Default is None, i.e, the Ward algorithm is unstructured.

    n_components : int (optional)
        The number of connected components in the graph.

    n_clusters : int (optional)
        Stop early the construction of the tree at n_clusters. This is
        useful to decrease computation time if the number of clusters is
        not small compared to the number of samples. In this case, the
        complete tree is not computed, thus the 'children' output is of
        limited use, and the 'parents' output should rather be used.
        This option is valid only when specifying a connectivity matrix.

    linkage : {"average", "complete", "single"}, optional, default: "complete"
        Which linkage criteria to use. The linkage criterion determines which
        distance to use between sets of observation.
            - average uses the average of the distances of each observation of
              the two sets
            - complete or maximum linkage uses the maximum distances between
              all observations of the two sets.
            - single uses the minimum of the distances between all observations
              of the two sets.

    affinity : string or callable, optional, default: "euclidean".
        which metric to use. Can be "euclidean", "manhattan", or any
        distance know to paired distance (see metric.pairwise)

    return_distance : bool, default False
        whether or not to return the distances between the clusters.

    Returns
    -------
    children : 2D array, shape (n_nodes-1, 2)
        The children of each non-leaf node. Values less than `n_samples`
        correspond to leaves of the tree which are the original samples.
        A node `i` greater than or equal to `n_samples` is a non-leaf
        node and has children `children_[i - n_samples]`. Alternatively
        at the i-th iteration, children[i][0] and children[i][1]
        are merged to form node `n_samples + i`

    n_components : int
        The number of connected components in the graph.

    n_leaves : int
        The number of leaves in the tree.

    parents : 1D array, shape (n_nodes, ) or None
        The parent of each node. Only returned when a connectivity matrix
        is specified, elsewhere 'None' is returned.

    distances : ndarray, shape (n_nodes-1,)
        Returned when return_distance is set to True.

        distances[i] refers to the distance between children[i][0] and
        children[i][1] when they are merged.

    See also
    --------
    ward_tree : hierarchical clustering with ward linkage
    R   s:   n_components was deprecated in 0.19will be removed in 0.21i   iR   t   averaget   singlesE   Unknown linkage option, linkage should be one of %s, but %s was given(   RQ   s   Partial build of the tree is implemented only for structured clustering (i.e. with explicit connectivity). The algorithm will build the full tree and only retain the lower branches required for the specified number of clustersR   i   t   precomputedi    R|   t   l2RT   t   l1t	   manhattant	   cityblockt   methodR   NR"   R1   R2   c         3   s6   |  ], \ } } |   k  r t  j |   |  Vq d  S(   N(   R   t   WeightedEdge(   t   .0t   rt   d(   Ru   (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pys	   <genexpr>  s   (   ii   (   R   R   (7   R   R   t   DeprecationWarningR   RW   RX   RY   R   R   t	   max_merget   average_mergeRC   t   KeyErrorR   t   keysRZ   RQ   t   triu_indicest   callablet   linkageR4   R?   R/   R9   R;   R<   R6   R	   t   AssertionErrorRP   Rc   t   objectRe   R   RB   Rg   R]   R   RA   R1   R_   R    R@   Rh   R   R   R   t   at   bt   weightR^   Rl   R   R   Ra   ("   R    R!   R$   RE   R   R"   RF   R#   Rp   t   linkage_choicest	   join_funcRQ   R&   R)   Rq   RK   RO   t	   diag_maskRD   Rt   Rx   R6   R;   RL   Ry   Rz   R|   t   edget   n_it   n_jRs   R~   R   R   (    (   Ru   s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyt   linkage_treeW  s    Q	



"		"("			
			

  
(c          O   s   d | d <t  |  |   S(   NR   R   (   R   (   t   argst   kwargs(    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyt   _complete_linkageG  s    
c          O   s   d | d <t  |  |   S(   NR   R   (   R   (   R   R   (    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyt   _average_linkageL  s    
c          O   s   d | d <t  |  |   S(   NR   R   (   R   (   R   R   (    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyt   _single_linkageQ  s    
R\   R   R   c         C   s   |  | k r% t  d |  | f   n  t | d  d g } xO t |  d  D]= } | | d | } t | | d  t | | d  qN Wt j | d t j } x4 t |  D]& \ } } | | t	 j
 | | |  <q W| S(   s  Function cutting the ward tree for a given number of clusters.

    Parameters
    ----------
    n_clusters : int or ndarray
        The number of clusters to form.

    children : 2D array, shape (n_nodes-1, 2)
        The children of each non-leaf node. Values less than `n_samples`
        correspond to leaves of the tree which are the original samples.
        A node `i` greater than or equal to `n_samples` is a non-leaf
        node and has children `children_[i - n_samples]`. Alternatively
        at the i-th iteration, children[i][0] and children[i][1]
        are merged to form node `n_samples + i`

    n_leaves : int
        Number of leaves of the tree.

    Returns
    -------
    labels : array [n_samples]
        cluster labels for each point

    s]   Cannot extract more clusters than samples: %s clusters where given for a tree with %s leaves.ii   i    R2   (   R   t   maxR   R   R   R   Rb   RA   RB   R   t   _hc_get_descendent(   RE   Rz   R   t   nodesR&   t   these_childrent   labelt   node(    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyt   _hc_cut`  s    t   AgglomerativeClusteringc           B   s8   e  Z d  Z d d d d d d d d  Z d d  Z RS(	   s!  
    Agglomerative Clustering

    Recursively merges the pair of clusters that minimally increases
    a given linkage distance.

    Read more in the :ref:`User Guide <hierarchical_clustering>`.

    Parameters
    ----------
    n_clusters : int, default=2
        The number of clusters to find.

    affinity : string or callable, default: "euclidean"
        Metric used to compute the linkage. Can be "euclidean", "l1", "l2",
        "manhattan", "cosine", or 'precomputed'.
        If linkage is "ward", only "euclidean" is accepted.

    memory : None, str or object with the joblib.Memory interface, optional
        Used to cache the output of the computation of the tree.
        By default, no caching is done. If a string is given, it is the
        path to the caching directory.

    connectivity : array-like or callable, optional
        Connectivity matrix. Defines for each sample the neighboring
        samples following a given structure of the data.
        This can be a connectivity matrix itself or a callable that transforms
        the data into a connectivity matrix, such as derived from
        kneighbors_graph. Default is None, i.e, the
        hierarchical clustering algorithm is unstructured.

    compute_full_tree : bool or 'auto' (optional)
        Stop early the construction of the tree at n_clusters. This is
        useful to decrease computation time if the number of clusters is
        not small compared to the number of samples. This option is
        useful only when specifying a connectivity matrix. Note also that
        when varying the number of clusters and using caching, it may
        be advantageous to compute the full tree.

    linkage : {"ward", "complete", "average", "single"}, optional             (default="ward")
        Which linkage criterion to use. The linkage criterion determines which
        distance to use between sets of observation. The algorithm will merge
        the pairs of cluster that minimize this criterion.

        - ward minimizes the variance of the clusters being merged.
        - average uses the average of the distances of each observation of
          the two sets.
        - complete or maximum linkage uses the maximum distances between
          all observations of the two sets.
        - single uses the minimum of the distances between all observations
          of the two sets.

    pooling_func : callable, default='deprecated'
        Ignored.

        .. deprecated:: 0.20
            ``pooling_func`` has been deprecated in 0.20 and will be removed
            in 0.22.

    Attributes
    ----------
    labels_ : array [n_samples]
        cluster labels for each point

    n_leaves_ : int
        Number of leaves in the hierarchical tree.

    n_components_ : int
        The estimated number of connected components in the graph.

    children_ : array-like, shape (n_samples-1, 2)
        The children of each non-leaf node. Values less than `n_samples`
        correspond to leaves of the tree which are the original samples.
        A node `i` greater than or equal to `n_samples` is a non-leaf
        node and has children `children_[i - n_samples]`. Alternatively
        at the i-th iteration, children[i][0] and children[i][1]
        are merged to form node `n_samples + i`

    Examples
    --------
    >>> from sklearn.cluster import AgglomerativeClustering
    >>> import numpy as np
    >>> X = np.array([[1, 2], [1, 4], [1, 0],
    ...               [4, 2], [4, 4], [4, 0]])
    >>> clustering = AgglomerativeClustering().fit(X)
    >>> clustering # doctest: +NORMALIZE_WHITESPACE
    AgglomerativeClustering(affinity='euclidean', compute_full_tree='auto',
                connectivity=None, linkage='ward', memory=None, n_clusters=2,
                pooling_func='deprecated')
    >>> clustering.labels_
    array([1, 1, 1, 0, 0, 0])

    i   RT   t   autoR\   R   c         C   sC   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ d  S(   N(   RE   t   memoryR!   t   compute_full_treeR   R"   t   pooling_func(   t   selfRE   R"   R   R!   R   R   R   (    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyt   __init__  s    						c         C   s  |  j  d k r2 t |  t  r2 t j d t  n  t | d d d |  } t |  j  } |  j	 d k r t
 d t |  j	    n  |  j d k r |  j d	 k r t
 d
 |  j f   n  |  j t k r t
 d |  j t j   f   n  t |  j } |  j } |  j d k	 rPt |  j  r2|  j |  } n  t | d d d d g } n  t |  } |  j } |  j d k r}t } n  | d k r|  j	 t d d |  k  } n  |  j	 } | rd } n  i  }	 |  j d k r|  j |	 d <|  j |	 d <n  | j |  | | d | |	 \ |  _ |  _ |  _ }
 | rOt |  j	 |  j |  j  |  _ nF t j |
 d t } t  j! | |   } t  j" t  j# |  |  |  _ |  S(   sR  Fit the hierarchical clustering on the data

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training data. Shape [n_samples, n_features], or [n_samples,
            n_samples] if affinity=='precomputed'.

        y : Ignored

        Returns
        -------
        self
        R   su   Agglomerative "pooling_func" parameter is not used. It has been deprecated in version 0.20 and will beremoved in 0.22t   ensure_min_samplesi   t	   estimatori    s@   n_clusters should be an integer greater than 0. %s was provided.R\   RT   sI   %s was provided as affinity. Ward can only work with euclidean distances.s-   Unknown linkage type %s. Valid options are %st   accept_sparset   csrt   coot   lilR   id   g{Gz?R   R"   RE   t   copyN($   R   t
   isinstanceR   R   R   R   R   R   R   RE   R   t   strR   R"   t   _TREE_BUILDERSR   R!   RC   R   R`   R   R   R   t   cacheRK   t   n_components_t	   n_leaves_R   t   labels_R   t   hc_get_headsRl   R   R   t   searchsortedt   unique(   R   R    t   yR   t   tree_builderR!   R#   R   RE   R   t   parentsR%   (    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyt   fit  s\    	
					N(   t   __name__t
   __module__t   __doc__RC   R   R   (    (    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyR     s   ^	t   FeatureAgglomerationc           B   sJ   e  Z d  Z d d d d d d e j d  Z d d  Z e d    Z	 RS(	   sw  Agglomerate features.

    Similar to AgglomerativeClustering, but recursively merges features
    instead of samples.

    Read more in the :ref:`User Guide <hierarchical_clustering>`.

    Parameters
    ----------
    n_clusters : int, default 2
        The number of clusters to find.

    affinity : string or callable, default "euclidean"
        Metric used to compute the linkage. Can be "euclidean", "l1", "l2",
        "manhattan", "cosine", or 'precomputed'.
        If linkage is "ward", only "euclidean" is accepted.

    memory : None, str or object with the joblib.Memory interface, optional
        Used to cache the output of the computation of the tree.
        By default, no caching is done. If a string is given, it is the
        path to the caching directory.

    connectivity : array-like or callable, optional
        Connectivity matrix. Defines for each feature the neighboring
        features following a given structure of the data.
        This can be a connectivity matrix itself or a callable that transforms
        the data into a connectivity matrix, such as derived from
        kneighbors_graph. Default is None, i.e, the
        hierarchical clustering algorithm is unstructured.

    compute_full_tree : bool or 'auto', optional, default "auto"
        Stop early the construction of the tree at n_clusters. This is
        useful to decrease computation time if the number of clusters is
        not small compared to the number of features. This option is
        useful only when specifying a connectivity matrix. Note also that
        when varying the number of clusters and using caching, it may
        be advantageous to compute the full tree.

    linkage : {"ward", "complete", "average", "single"}, optional            (default="ward")
        Which linkage criterion to use. The linkage criterion determines which
        distance to use between sets of features. The algorithm will merge
        the pairs of cluster that minimize this criterion.

        - ward minimizes the variance of the clusters being merged.
        - average uses the average of the distances of each feature of
          the two sets.
        - complete or maximum linkage uses the maximum distances between
          all features of the two sets.
        - single uses the minimum of the distances between all observations
          of the two sets.

    pooling_func : callable, default np.mean
        This combines the values of agglomerated features into a single
        value, and should accept an array of shape [M, N] and the keyword
        argument `axis=1`, and reduce it to an array of size [M].

    Attributes
    ----------
    labels_ : array-like, (n_features,)
        cluster labels for each feature.

    n_leaves_ : int
        Number of leaves in the hierarchical tree.

    n_components_ : int
        The estimated number of connected components in the graph.

    children_ : array-like, shape (n_nodes-1, 2)
        The children of each non-leaf node. Values less than `n_features`
        correspond to leaves of the tree which are the original samples.
        A node `i` greater than or equal to `n_features` is a non-leaf
        node and has children `children_[i - n_features]`. Alternatively
        at the i-th iteration, children[i][0] and children[i][1]
        are merged to form node `n_features + i`

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn import datasets, cluster
    >>> digits = datasets.load_digits()
    >>> images = digits.images
    >>> X = np.reshape(images, (len(images), -1))
    >>> agglo = cluster.FeatureAgglomeration(n_clusters=32)
    >>> agglo.fit(X) # doctest: +ELLIPSIS
    FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',
               connectivity=None, linkage='ward', memory=None, n_clusters=32,
               pooling_func=...)
    >>> X_reduced = agglo.transform(X)
    >>> X_reduced.shape
    (1797, 32)
    i   RT   R   R\   c         C   sD   t  t |   j d | d | d | d | d | d |  | |  _ d  S(   NRE   R   R!   R   R   R"   (   t   superR   R   R   (   R   RE   R"   R   R!   R   R   R   (    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyR     s
    c         K   s=   t  | d d d d g d d d |  } t j |  | j |  S(   s   Fit the hierarchical clustering on the data

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            The data

        y : Ignored

        Returns
        -------
        self
        R   R   t   cscR   t   ensure_min_featuresi   R   (   R   R   R   R   (   R   R    R   t   params(    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyR     s    c         C   s
   t   d  S(   N(   t   AttributeError(   R   (    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyt   fit_predict  s    N(
   R   R   R   RC   R   t   meanR   R   t   propertyR   (    (    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyR   M  s   \(/   R   t   heapqR    R   R   R   R   t   numpyR   t   scipyR   R3   R   t   baseR   R   t	   externalsR   t   metrics.pairwiseR	   R
   t   utilsR   t   utils.validationR   t    R   t   _feature_agglomerationR   t   utils.fast_dictR   t   externals.six.movesR   R/   RP   RC   Rl   R   R   R   R   R   t   dictR   R   R   R   (    (    (    s;   lib/python2.7/site-packages/sklearn/cluster/hierarchical.pyt   <module>	   s>   "	2	5						1