ó
‡ˆ\c           @   s!  d  Z  d d l m Z d d l m Z d d l m Z d d l m Z d d l m	 Z	 d d l m
 Z
 d d l Z d	 d
 l m Z d	 d l m Z d d l m Z d „  Z d „  Z i e d 6e d 6e d 6Z d „  Z d „  Z d „  Z d „  Z d „  Z d d „ Z d d „ Z d „  Z d S(   sX   
Multi-class / multi-label utility function
==========================================

iÿÿÿÿ(   t   division(   t   chain(   t   issparse(   t   spmatrix(   t
   dok_matrix(   t
   lil_matrixNi   (   t   string_types(   t	   _Sequencei   (   t   check_arrayc         C   s3   t  |  d ƒ r% t j t j |  ƒ ƒ St |  ƒ Sd  S(   Nt	   __array__(   t   hasattrt   npt   uniquet   asarrayt   set(   t   y(    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pyt   _unique_multiclass   s    c         C   s&   t  j t |  d d d g ƒ j d ƒ S(   Nt   csrt   csct   cooi   (   R   t   arangeR   t   shape(   R   (    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pyt   _unique_indicator   s    t   binaryt
   multiclasss   multilabel-indicatorc             s]  |  s t  d ƒ ‚ n  t d „  |  Dƒ ƒ } | t d d g ƒ k rU t d g ƒ } n  t | ƒ d k rz t  d | ƒ ‚ n  | j ƒ  } | d k rÃ t t d „  |  Dƒ ƒ ƒ d k rÃ t  d	 ƒ ‚ n  t j | d ƒ ‰  ˆ  sô t  d
 t |  ƒ ƒ ‚ n  t t j	 ‡  f d †  |  Dƒ ƒ ƒ } t t d „  | Dƒ ƒ ƒ d k rJt  d ƒ ‚ n  t
 j t | ƒ ƒ S(   sy  Extract an ordered array of unique labels

    We don't allow:
        - mix of multilabel and multiclass (single label) targets
        - mix of label indicator matrix and anything else,
          because there are no explicit labels)
        - mix of label indicator matrices of different sizes
        - mix of string and integer labels

    At the moment, we also don't allow "multiclass-multioutput" input type.

    Parameters
    ----------
    *ys : array-likes

    Returns
    -------
    out : numpy array of shape [n_unique_labels]
        An ordered array of unique labels.

    Examples
    --------
    >>> from sklearn.utils.multiclass import unique_labels
    >>> unique_labels([3, 5, 5, 5, 7, 7])
    array([3, 5, 7])
    >>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])
    array([1, 2, 3, 4])
    >>> unique_labels([1, 2, 10], [5, 11])
    array([ 1,  2,  5, 10, 11])
    s   No argument has been passed.c         s   s   |  ] } t  | ƒ Vq d  S(   N(   t   type_of_target(   t   .0t   x(    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pys	   <genexpr>M   s    R   R   i   s'   Mix type of y not allowed, got types %ss   multilabel-indicatorc         s   s.   |  ]$ } t  | d  d d g ƒ j d Vq d S(   R   R   R   i   N(   R   R   (   R   R   (    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pys	   <genexpr>X   s   sC   Multi-label binary indicator input with different numbers of labelss   Unknown label type: %sc         3   s   |  ] } ˆ  | ƒ Vq d  S(   N(    (   R   R   (   t   _unique_labels(    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pys	   <genexpr>b   s    c         s   s   |  ] } t  | t ƒ Vq d  S(   N(   t
   isinstanceR   (   R   t   label(    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pys	   <genexpr>e   s    s,   Mix of label input types (string and number)N(   t
   ValueErrorR   t   lent   popt   _FN_UNIQUE_LABELSt   gett   Nonet   reprR   t   from_iterableR   t   arrayt   sorted(   t   yst   ys_typest
   label_typet	   ys_labels(    (   R   s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pyt   unique_labels*   s&    %"c         C   s.   |  j  j d k o- t j |  j t ƒ |  k ƒ S(   Nt   f(   t   dtypet   kindR   t   allt   astypet   int(   R   (    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pyt   _is_integral_floatk   s    c         C   s  t  |  d ƒ r! t j |  ƒ }  n  t  |  d ƒ oO |  j d k oO |  j d d k sV t St |  ƒ rÞ t |  t t	 f ƒ r† |  j
 ƒ  }  n  t |  j ƒ d k pÝ t j |  j ƒ j d k oÝ |  j j d k pÝ t t j |  j ƒ ƒ St j |  ƒ } t | ƒ d k  o|  j j d k pt | ƒ Sd S(	   sƒ   Check if ``y`` is in a multilabel format.

    Parameters
    ----------
    y : numpy array of shape [n_samples]
        Target values.

    Returns
    -------
    out : bool,
        Return ``True``, if ``y`` is in a multilabel format, else ```False``.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.utils.multiclass import is_multilabel
    >>> is_multilabel([0, 1, 0, 1])
    False
    >>> is_multilabel([[1], [0, 2], []])
    False
    >>> is_multilabel(np.array([[1, 0], [0, 0]]))
    True
    >>> is_multilabel(np.array([[1], [0], [0]]))
    False
    >>> is_multilabel(np.array([[1, 0, 0]]))
    True
    R	   R   i   i   i    t   biui   N(   R
   R   R   t   ndimR   t   FalseR   R   R   R   t   tocsrR    t   dataR   t   sizeR/   R0   R4   (   R   t   labels(    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pyt   is_multilabelo   s    10$c         C   s/   t  |  ƒ } | d k r+ t d | ƒ ‚ n  d S(	   s*  Ensure that target y is of a non-regression type.

    Only the following target types (as defined in type_of_target) are allowed:
        'binary', 'multiclass', 'multiclass-multioutput',
        'multilabel-indicator', 'multilabel-sequences'

    Parameters
    ----------
    y : array-like
    R   R   s   multiclass-multioutputs   multilabel-indicators   multilabel-sequencess   Unknown label type: %rN(   R   R   s   multiclass-multioutputs   multilabel-indicators   multilabel-sequences(   R   R   (   R   t   y_type(    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pyt   check_classification_targets   s    	c         C   s2  t  |  t t f ƒ s$ t |  d ƒ o1 t  |  t ƒ } | sM t d |  ƒ ‚ n  |  j j d k } | rt t d ƒ ‚ n  t |  ƒ r„ d Sy t	 j
 |  ƒ }  Wn t k
 r« d SXyN t |  d d ƒ rù t  |  d t ƒ rù t  |  d t ƒ rù t d ƒ ‚ n  Wn t k
 rn X|  j d	 k sO|  j t k rSt |  ƒ rSt  |  j d t ƒ rSd S|  j d	 k ry|  j d
 d k ryd S|  j d	 k r¤|  j d
 d
 k r¤d } n d } |  j j d k rât	 j |  |  j t ƒ k ƒ râd | St t	 j |  ƒ ƒ d	 k s"|  j d	 k r*t |  d ƒ d
 k r*d | Sd Sd S(   sì  Determine the type of data indicated by the target.

    Note that this type is the most specific type that can be inferred.
    For example:

        * ``binary`` is more specific but compatible with ``multiclass``.
        * ``multiclass`` of integers is more specific but compatible with
          ``continuous``.
        * ``multilabel-indicator`` is more specific but compatible with
          ``multiclass-multioutput``.

    Parameters
    ----------
    y : array-like

    Returns
    -------
    target_type : string
        One of:

        * 'continuous': `y` is an array-like of floats that are not all
          integers, and is 1d or a column vector.
        * 'continuous-multioutput': `y` is a 2d array of floats that are
          not all integers, and both dimensions are of size > 1.
        * 'binary': `y` contains <= 2 discrete values and is 1d or a column
          vector.
        * 'multiclass': `y` contains more than two discrete values, is not a
          sequence of sequences, and is 1d or a column vector.
        * 'multiclass-multioutput': `y` is a 2d array that contains more
          than two discrete values, is not a sequence of sequences, and both
          dimensions are of size > 1.
        * 'multilabel-indicator': `y` is a label indicator matrix, an array
          of two dimensions with at least two columns, and at most 2 unique
          values.
        * 'unknown': `y` is array-like but none of the above, such as a 3d
          array, sequence of sequences, or an array of non-sequence objects.

    Examples
    --------
    >>> import numpy as np
    >>> type_of_target([0.1, 0.6])
    'continuous'
    >>> type_of_target([1, -1, -1, 1])
    'binary'
    >>> type_of_target(['a', 'b', 'a'])
    'binary'
    >>> type_of_target([1.0, 2.0])
    'binary'
    >>> type_of_target([1, 0, 2])
    'multiclass'
    >>> type_of_target([1.0, 0.0, 3.0])
    'multiclass'
    >>> type_of_target(['a', 'b', 'c'])
    'multiclass'
    >>> type_of_target(np.array([[1, 2], [3, 1]]))
    'multiclass-multioutput'
    >>> type_of_target([[1, 2]])
    'multiclass-multioutput'
    >>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))
    'continuous-multioutput'
    >>> type_of_target(np.array([[0, 1], [1, 1]]))
    'multilabel-indicator'
    R	   s:   Expected array-like (array or non-string sequence), got %rt   SparseSeriess!   y cannot be class 'SparseSeries'.s   multilabel-indicatort   unknowni    sœ   You appear to be using a legacy multi-label data representation. Sequence of sequences are no longer supported; use a binary array or sparse matrix instead.i   i   s   -multioutputt    R.   t
   continuousR   R   N(   R   t   SequenceR   R
   R   R   t	   __class__t   __name__R<   R   R   t
   IndexErrorR6   R/   t   objectR    t   flatR   R0   t   anyR2   R3   R   (   R   t   validt   sparseseriest   suffix(    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pyR   ®   sB    @$'*""	0@c         C   s«   t  |  d d ƒ d k r3 | d k r3 t d ƒ ‚ nt | d k	 r§ t  |  d d ƒ d k	 r‘ t j |  j t | ƒ ƒ s¤ t d | |  j f ƒ ‚ q¤ q§ t | ƒ |  _ t Sn  t S(   s!  Private helper function for factorizing common classes param logic

    Estimators that implement the ``partial_fit`` API need to be provided with
    the list of possible classes at the first call to partial_fit.

    Subsequent calls to partial_fit should check that ``classes`` is still
    consistent with a previous value of ``clf.classes_`` when provided.

    This function returns True if it detects that this was the first call to
    ``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also
    set on ``clf``.

    t   classes_s8   classes must be passed on the first call to partial_fit.sD   `classes=%r` is not the same as on last call to partial_fit, was: %rN(	   t   getattrR$   R   R   t   array_equalRM   R-   t   TrueR7   (   t   clft   classes(    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pyt   _check_partial_fit_first_call%  s    $c         C   su  g  } g  } g  } |  j  \ } } t |  ƒ rÙ|  j ƒ  }  t j |  j ƒ } xt | ƒ D]z} |  j |  j | |  j | d !}	 | d k	 r¾ t j	 | ƒ |	 }
 t j
 | ƒ t j
 |
 ƒ } n d }
 |  j  d | | } t j |  j |  j | |  j | d !d t ƒ\ } } t j | d |
 ƒ} d | k rJ| | d k c | 7<n  d | k rš| | |  j  d k  ršt j | d d ƒ } t j | d | ƒ } n  | j | ƒ | j | j  d ƒ | j | | j
 ƒ  ƒ qX Wn xŒ t | ƒ D]~ } t j |  d d … | f d t ƒ\ } } | j | ƒ | j | j  d ƒ t j | d | ƒ} | j | | j
 ƒ  ƒ qæW| | | f S(   sz  Compute class priors from multioutput-multiclass target data

    Parameters
    ----------
    y : array like or sparse matrix of size (n_samples, n_outputs)
        The labels for each example.

    sample_weight : array-like of shape = (n_samples,), optional
        Sample weights.

    Returns
    -------
    classes : list of size n_outputs of arrays of size (n_classes,)
        List of classes for each column.

    n_classes : list of integers of size n_outputs
        Number of classes in each column

    class_prior : list of size n_outputs of arrays of size (n_classes,)
        Class distribution of each column.

    i   i    t   return_inverset   weightsN(   R   R   t   tocscR   t   difft   indptrt   ranget   indicesR$   R   t   sumR   R9   RP   t   bincountt   insertt   append(   R   t   sample_weightRR   t	   n_classest   class_priort	   n_samplest	   n_outputst   y_nnzt   kt   col_nonzerot   nz_samp_weightt   zeros_samp_weight_sumt	   classes_kt   y_kt   class_prior_k(    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pyt   class_distributionH  sD    "(#+c         C   sž  |  j  d } t j | | f ƒ } t j | | f ƒ } d } xì t | ƒ D]Þ } xÕ t | d | ƒ D]À } | d d … | f c | d d … | f 8<| d d … | f c | d d … | f 7<| |  d d … | f d k | f c d 7<| |  d d … | f d k | f c d 7<| d 7} qd WqJ W| j ƒ  }	 | j ƒ  }
 |	 |
 k rT| St j | j ƒ j } t t	 |	 ƒ t	 |
 ƒ ƒ } d | | } | | | S(   st  Compute a continuous, tie-breaking OvR decision function from OvO.

    It is important to include a continuous value, not only votes,
    to make computing AUC or calibration meaningful.

    Parameters
    ----------
    predictions : array-like, shape (n_samples, n_classifiers)
        Predicted classes for each binary classifier.

    confidences : array-like, shape (n_samples, n_classifiers)
        Decision functions or predicted probabilities for positive class
        for each binary classifier.

    n_classes : int
        Number of classes. n_classifiers must be
        ``n_classes * (n_classes - 1 ) / 2``
    i    i   Ng      à?(
   R   R   t   zerosRY   t   maxt   mint   finfoR/   t   epst   abs(   t   predictionst   confidencesR`   Rb   t   votest   sum_of_confidencesRe   t   it   jt   max_confidencest   min_confidencesRq   t   max_abs_confidencet   scale(    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pyt   _ovr_decision_function’  s&    ,,,,(    t   __doc__t
   __future__R    t	   itertoolsR   t   scipy.sparseR   t   scipy.sparse.baseR   R   R   t   numpyR   t   externals.sixR   t   utils.fixesR   RC   t
   validationR   R   R   R"   R-   R4   R<   R>   R   R$   RS   Rl   R}   (    (    (    s7   lib/python2.7/site-packages/sklearn/utils/multiclass.pyt   <module>   s0   		
	A		.		w#J