ó
‡ˆ\c           @   sÖ   d  Z  d d l Z d d l Z d d l m Z d d l m Z d d l m Z d d l m	 Z	 d d l
 m Z m Z d d	 l m Z m Z d d
 l m Z d d l m Z d d „ Z d e e e f d „  ƒ  YZ d S(   s„   
Soft Voting/Majority Rule classifier.

This module contains a Soft Voting/Majority Rule classifier for
classification estimators.

iÿÿÿÿNi   (   t   ClassifierMixin(   t   TransformerMixin(   t   clone(   t   LabelEncoder(   t   Parallelt   delayed(   t   has_fit_parametert   check_is_fitted(   t   _BaseComposition(   t   Bunchc         C   s9   | d k	 r% |  j | | d | ƒn |  j | | ƒ |  S(   s7   Private function used to fit an estimator within a job.t   sample_weightN(   t   Nonet   fit(   t	   estimatort   Xt   yR
   (    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyt   _parallel_fit_estimator   s    t   VotingClassifierc           B   sž   e  Z d  Z d d d d d „ Z e d „  ƒ Z d d „ Z e d „  ƒ Z d „  Z	 d „  Z
 d „  Z e d	 „  ƒ Z d
 „  Z d „  Z e d „ Z d „  Z RS(   sD  Soft Voting/Majority Rule classifier for unfitted estimators.

    .. versionadded:: 0.17

    Read more in the :ref:`User Guide <voting_classifier>`.

    Parameters
    ----------
    estimators : list of (string, estimator) tuples
        Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones
        of those original estimators that will be stored in the class attribute
        ``self.estimators_``. An estimator can be set to `None` using
        ``set_params``.

    voting : str, {'hard', 'soft'} (default='hard')
        If 'hard', uses predicted class labels for majority rule voting.
        Else if 'soft', predicts the class label based on the argmax of
        the sums of the predicted probabilities, which is recommended for
        an ensemble of well-calibrated classifiers.

    weights : array-like, shape = [n_classifiers], optional (default=`None`)
        Sequence of weights (`float` or `int`) to weight the occurrences of
        predicted class labels (`hard` voting) or class probabilities
        before averaging (`soft` voting). Uses uniform weights if `None`.

    n_jobs : int or None, optional (default=None)
        The number of jobs to run in parallel for ``fit``.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    flatten_transform : bool, optional (default=None)
        Affects shape of transform output only when voting='soft'
        If voting='soft' and flatten_transform=True, transform method returns
        matrix with shape (n_samples, n_classifiers * n_classes). If
        flatten_transform=False, it returns
        (n_classifiers, n_samples, n_classes).

    Attributes
    ----------
    estimators_ : list of classifiers
        The collection of fitted sub-estimators as defined in ``estimators``
        that are not `None`.

    named_estimators_ : Bunch object, a dictionary with attribute access
        Attribute to access any fitted sub-estimators by name.

        .. versionadded:: 0.20

    classes_ : array-like, shape = [n_predictions]
        The classes labels.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.linear_model import LogisticRegression
    >>> from sklearn.naive_bayes import GaussianNB
    >>> from sklearn.ensemble import RandomForestClassifier, VotingClassifier
    >>> clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial',
    ...                           random_state=1)
    >>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
    >>> clf3 = GaussianNB()
    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    >>> y = np.array([1, 1, 1, 2, 2, 2])
    >>> eclf1 = VotingClassifier(estimators=[
    ...         ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')
    >>> eclf1 = eclf1.fit(X, y)
    >>> print(eclf1.predict(X))
    [1 1 1 2 2 2]
    >>> np.array_equal(eclf1.named_estimators_.lr.predict(X),
    ...                eclf1.named_estimators_['lr'].predict(X))
    True
    >>> eclf2 = VotingClassifier(estimators=[
    ...         ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
    ...         voting='soft')
    >>> eclf2 = eclf2.fit(X, y)
    >>> print(eclf2.predict(X))
    [1 1 1 2 2 2]
    >>> eclf3 = VotingClassifier(estimators=[
    ...        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
    ...        voting='soft', weights=[2,1,1],
    ...        flatten_transform=True)
    >>> eclf3 = eclf3.fit(X, y)
    >>> print(eclf3.predict(X))
    [1 1 1 2 2 2]
    >>> print(eclf3.transform(X).shape)
    (6, 6)
    t   hardc         C   s1   | |  _  | |  _ | |  _ | |  _ | |  _ d  S(   N(   t
   estimatorst   votingt   weightst   n_jobst   flatten_transform(   t   selfR   R   R   R   R   (    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyt   __init__~   s
    				c         C   s   t  t |  j ƒ   S(   N(   R	   t   dictR   (   R   (    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyt   named_estimators†   s    c            sx  t  | t j ƒ rI t | j ƒ d k rI | j d d k rI t d ƒ ‚ n  |  j d k rn t d |  j ƒ ‚ n  |  j d k s’ t |  j ƒ d k r¡ t
 d ƒ ‚ n  |  j d k	 rù t |  j ƒ t |  j ƒ k rù t d t |  j ƒ t |  j ƒ f ƒ ‚ n  ˆ d k	 rDx< |  j D]. \ } } t | d	 ƒ st d
 | ƒ ‚ qqWn  t |  j Œ  \ } } |  j | ƒ t j g  |  j D] \ } }	 |	 d k ^ qvƒ }
 |
 t |  j ƒ k r»t d ƒ ‚ n  t ƒ  j | ƒ |  _ |  j j |  _ g  |  _ |  j j | ƒ ‰ t d |  j ƒ ‡  ‡ ‡ f d †  | Dƒ ƒ |  _ t t ƒ    |  _ x4 t |  j |  j ƒ D] \ } } | |  j | d <qSW|  S(   s€   Fit the estimators.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples]
            Target values.

        sample_weight : array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted.
            Note that this is supported only if all underlying estimators
            support sample weights.

        Returns
        -------
        self : object
        i   s<   Multilabel and multi-output classification is not supported.t   softR   s0   Voting must be 'soft' or 'hard'; got (voting=%r)i    s[   Invalid `estimators` attribute, `estimators` should be a list of (string, estimator) tuplessN   Number of classifiers and weights must be equal; got %d weights, %d estimatorsR
   s:   Underlying estimator '%s' does not support sample weights.sE   All estimators are None. At least one is required to be a classifier!R   c         3   s?   |  ]5 } | d k	 r t t ƒ t | ƒ ˆ  ˆ d  ˆ ƒVq d S(   R
   N(   R   R   R   R   (   t   .0t   clf(   R   R
   t   transformed_y(    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pys	   <genexpr>Æ   s   (   R   R   N(   t
   isinstancet   npt   ndarrayt   lent   shapet   NotImplementedErrorR   t
   ValueErrorR   R   t   AttributeErrorR   R   t   zipt   _validate_namest   sumR   R   t   le_t   classes_t   estimators_t	   transformR   R   R	   R   t   named_estimators_(   R   R   R   R
   t   namet   stept   namest   clfst   _R   t   n_isnonet   kt   e(    (   R   R
   R   sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyR   Š   s@    :$%1	"c         C   sO   |  j  d k r d Sg  t |  j |  j  ƒ D]" \ } } | d d k	 r) | ^ q) S(   s(   Get the weights of not `None` estimatorsi   N(   R   R   R(   R   (   R   t   estt   w(    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyt   _weights_not_noneÏ   s    c            s†   t  ˆ  d ƒ ˆ  j d k r= t j ˆ  j | ƒ d d ƒ} n3 ˆ  j | ƒ } t j ‡  f d †  d d d | ƒ} ˆ  j j | ƒ } | S(   s*   Predict class labels for X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            The input samples.

        Returns
        ----------
        maj : array-like, shape = [n_samples]
            Predicted class labels.
        R-   R   t   axisi   c            s   t  j t  j |  d ˆ  j ƒƒ S(   NR   (   R!   t   argmaxt   bincountR:   (   t   x(   R   (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyt   <lambda>ì   s   t   arr(	   R   R   R!   R<   t   predict_probat   _predictt   apply_along_axisR+   t   inverse_transform(   R   R   t   majt   predictions(    (   R   sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyt   predict×   s    !c         C   s,   t  j g  |  j D] } | j | ƒ ^ q ƒ S(   s(   Collect results from clf.predict calls. (   R!   t   asarrayR-   RA   (   R   R   R   (    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyt   _collect_probasô   s    c         C   s]   |  j  d k r% t d |  j  ƒ ‚ n  t |  d ƒ t j |  j | ƒ d d d |  j ƒ} | S(   s3   Predict class probabilities for X in 'soft' voting R   s-   predict_proba is not available when voting=%rR-   R;   i    R   (   R   R'   R   R!   t   averageRI   R:   (   R   R   t   avg(    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyt   _predict_probaø   s    c         C   s   |  j  S(   su  Compute probabilities of possible outcomes for samples in X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            The input samples.

        Returns
        ----------
        avg : array-like, shape = [n_samples, n_classes]
            Weighted average probability for each class per sample.
        (   RL   (   R   (    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyRA     s    c         C   s|   t  |  d ƒ |  j d k rk |  j | ƒ } |  j d k rN t j d t ƒ | S|  j s[ | St j	 | ƒ Sn |  j
 | ƒ Sd S(   s‡  Return class labels or probabilities for X for each estimator.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

        Returns
        -------
        probabilities_or_labels
            If `voting='soft'` and `flatten_transform=True`:
                returns array-like of shape (n_classifiers, n_samples *
                n_classes), being class probabilities calculated by each
                classifier.
            If `voting='soft' and `flatten_transform=False`:
                array-like of shape (n_classifiers, n_samples, n_classes)
            If `voting='hard'`:
                array-like of shape (n_samples, n_classifiers), being
                class labels predicted by each classifier.
        R-   R   sŠ   'flatten_transform' default value will be changed to True in 0.21. To silence this warning you may explicitly set flatten_transform=False.N(   R   R   RI   R   R   t   warningst   warnt   DeprecationWarningR!   t   hstackRB   (   R   R   t   probas(    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyR.     s    		c         K   s   t  t |  ƒ j d |  |  S(   s   Setting the parameters for the voting classifier

        Valid parameter keys can be listed with get_params().

        Parameters
        ----------
        **params : keyword arguments
            Specific parameters using e.g. set_params(parameter_name=new_value)
            In addition, to setting the parameters of the ``VotingClassifier``,
            the individual classifiers of the ``VotingClassifier`` can also be
            set or replaced by setting them to None.

        Examples
        --------
        # In this example, the RandomForestClassifier is removed
        clf1 = LogisticRegression()
        clf2 = RandomForestClassifier()
        eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)]
        eclf.set_params(rf=None)

        R   (   t   superR   t   _set_params(   R   t   params(    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyt
   set_params;  s    c         C   s   t  t |  ƒ j d d | ƒS(   så    Get the parameters of the VotingClassifier

        Parameters
        ----------
        deep : bool
            Setting it to True gets the various classifiers and the parameters
            of the classifiers as well
        R   t   deep(   RR   R   t   _get_params(   R   RV   (    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyt
   get_paramsT  s    	c         C   s/   t  j g  |  j D] } | j | ƒ ^ q ƒ j S(   s(   Collect results from clf.predict calls. (   R!   RH   R-   RG   t   T(   R   R   R   (    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyRB   `  s    N(   t   __name__t
   __module__t   __doc__R   R   t   propertyR   R   R:   RG   RI   RL   RA   R.   RU   t   TrueRX   RB   (    (    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyR   $   s   X	E			
	)	(   R\   t   numpyR!   RM   t   baseR    R   R   t   preprocessingR   t   utils._joblibR   R   t   utils.validationR   R   t   utils.metaestimatorsR   t   utilsR	   R   R   R   (    (    (    sA   lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyt   <module>   s   	