ó
‡ˆ\c           @   sy   d  d l  Z d d l m Z d d l m Z d d l m Z d d l m Z d d l	 m
 Z
 d	 e e f d
 „  ƒ  YZ d S(   iÿÿÿÿNi   (   t   BaseEstimatori   (   t   SelectorMixin(   t   check_array(   t   mean_variance_axis(   t   check_is_fittedt   VarianceThresholdc           B   s/   e  Z d  Z d d „ Z d d „ Z d „  Z RS(   s5  Feature selector that removes all low-variance features.

    This feature selection algorithm looks only at the features (X), not the
    desired outputs (y), and can thus be used for unsupervised learning.

    Read more in the :ref:`User Guide <variance_threshold>`.

    Parameters
    ----------
    threshold : float, optional
        Features with a training-set variance lower than this threshold will
        be removed. The default is to keep all features with non-zero variance,
        i.e. remove the features that have the same value in all samples.

    Attributes
    ----------
    variances_ : array, shape (n_features,)
        Variances of individual features.

    Examples
    --------
    The following dataset has integer features, two of which are the same
    in every sample. These are removed with the default setting for threshold::

        >>> X = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]
        >>> selector = VarianceThreshold()
        >>> selector.fit_transform(X)
        array([[2, 0],
               [1, 4],
               [1, 1]])
    g        c         C   s   | |  _  d  S(   N(   t	   threshold(   t   selfR   (    (    sK   lib/python2.7/site-packages/sklearn/feature_selection/variance_threshold.pyt   __init__-   s    c         C   s½   t  | d
 d t j ƒ} t | d ƒ rE t | d d ƒ\ } |  _ n t j | d d ƒ|  _ t j |  j |  j k ƒ r¹ d } | j	 d d k rž | d	 7} n  t
 | j |  j ƒ ƒ ‚ n  |  S(   s  Learn empirical variances from X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Sample vectors from which to compute variances.

        y : any
            Ignored. This parameter exists only for compatibility with
            sklearn.pipeline.Pipeline.

        Returns
        -------
        self
        t   csrt   csct   dtypet   toarrayt   axisi    s4   No feature in X meets the variance threshold {0:.5f}i   s    (X contains only one sample)(   R	   R
   (   R   t   npt   float64t   hasattrR   t
   variances_t   vart   allR   t   shapet
   ValueErrort   format(   R   t   Xt   yt   _t   msg(    (    sK   lib/python2.7/site-packages/sklearn/feature_selection/variance_threshold.pyt   fit0   s    c         C   s   t  |  d ƒ |  j |  j k S(   NR   (   R   R   R   (   R   (    (    sK   lib/python2.7/site-packages/sklearn/feature_selection/variance_threshold.pyt   _get_support_maskO   s    N(   t   __name__t
   __module__t   __doc__R   t   NoneR   R   (    (    (    sK   lib/python2.7/site-packages/sklearn/feature_selection/variance_threshold.pyR      s   (   t   numpyR   t   baseR    R   t   utilsR   t   utils.sparsefuncsR   t   utils.validationR   R   (    (    (    sK   lib/python2.7/site-packages/sklearn/feature_selection/variance_threshold.pyt   <module>   s   