B
    	\v;                 @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlZd	d
lmZ d	dlmZ ddlmZ dd Zdd ZeeedZdd Zdd Zdd Zdd Zdd Zd#ddZd$dd Zd!d" ZdS )%zX
Multi-class / multi-label utility function
==========================================

    )division)chain)issparse)spmatrix)
dok_matrix)
lil_matrixN   )string_types)	_Sequence   )check_arrayc             C   s&   t | drtt| S t| S d S )N	__array__)hasattrnpuniqueasarrayset)y r   7lib/python3.7/site-packages/sklearn/utils/multiclass.py_unique_multiclass   s    
r   c             C   s   t t| dddgjd S )Ncsrcsccoor   )r   Zaranger   shape)r   r   r   r   _unique_indicator   s    r   )binary
multiclasszmultilabel-indicatorc                 s   | st dtdd | D }|tddgkr8tdg}t|dkrPt d| | }|dkrttd	d | D dkrt d
t|d  st dt|  tt fdd| D }ttdd |D dkrt dt	
t|S )ay  Extract an ordered array of unique labels

    We don't allow:
        - mix of multilabel and multiclass (single label) targets
        - mix of label indicator matrix and anything else,
          because there are no explicit labels)
        - mix of label indicator matrices of different sizes
        - mix of string and integer labels

    At the moment, we also don't allow "multiclass-multioutput" input type.

    Parameters
    ----------
    *ys : array-likes

    Returns
    -------
    out : numpy array of shape [n_unique_labels]
        An ordered array of unique labels.

    Examples
    --------
    >>> from sklearn.utils.multiclass import unique_labels
    >>> unique_labels([3, 5, 5, 5, 7, 7])
    array([3, 5, 7])
    >>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])
    array([1, 2, 3, 4])
    >>> unique_labels([1, 2, 10], [5, 11])
    array([ 1,  2,  5, 10, 11])
    zNo argument has been passed.c             s   s   | ]}t |V  qd S )N)type_of_target).0xr   r   r   	<genexpr>M   s    z unique_labels.<locals>.<genexpr>r   r   r   z'Mix type of y not allowed, got types %szmultilabel-indicatorc             s   s$   | ]}t |d ddgjd V  qdS )r   r   r   r   N)r   r   )r   r   r   r   r   r!   X   s   zCMulti-label binary indicator input with different numbers of labelsNzUnknown label type: %sc             3   s   | ]} |V  qd S )Nr   )r   r   )_unique_labelsr   r   r!   b   s    c             s   s   | ]}t |tV  qd S )N)
isinstancer	   )r   Zlabelr   r   r   r!   e   s    z,Mix of label input types (string and number))
ValueErrorr   lenpop_FN_UNIQUE_LABELSgetreprr   from_iterabler   Zarraysorted)ZysZys_typesZ
label_typeZ	ys_labelsr   )r"   r   unique_labels*   s&    

r,   c             C   s    | j jdkot| t| kS )Nf)dtypekindr   allastypeint)r   r   r   r   _is_integral_floatk   s    r3   c             C   s   t | drt| } t | dr6| jdkr6| jd dks:dS t| rt| ttfrX| 	 } t
| jdkpt| jjdko| jjdkptt| jS t| }t
|dk o| jjdkpt|S d	S )
a   Check if ``y`` is in a multilabel format.

    Parameters
    ----------
    y : numpy array of shape [n_samples]
        Target values.

    Returns
    -------
    out : bool,
        Return ``True``, if ``y`` is in a multilabel format, else ```False``.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.utils.multiclass import is_multilabel
    >>> is_multilabel([0, 1, 0, 1])
    False
    >>> is_multilabel([[1], [0, 2], []])
    False
    >>> is_multilabel(np.array([[1, 0], [0, 0]]))
    True
    >>> is_multilabel(np.array([[1], [0], [0]]))
    False
    >>> is_multilabel(np.array([[1, 0, 0]]))
    True
    r   r   r   r   Fr   Zbiu   N)r   r   r   ndimr   r   r#   r   r   Ztocsrr%   datar   sizer.   r/   r3   )r   labelsr   r   r   is_multilabelo   s    

" 
r9   c             C   s    t | }|dkrtd| dS )a*  Ensure that target y is of a non-regression type.

    Only the following target types (as defined in type_of_target) are allowed:
        'binary', 'multiclass', 'multiclass-multioutput',
        'multilabel-indicator', 'multilabel-sequences'

    Parameters
    ----------
    y : array-like
    )r   r   zmulticlass-multioutputzmultilabel-indicatorzmultilabel-sequenceszUnknown label type: %rN)r   r$   )r   Zy_typer   r   r   check_classification_targets   s    r:   c             C   s  t | ttfst| do"t | t }|s4td|  | jjdk}|rLtdt| rXdS yt	
| } W n tk
rz   dS X y6t| d dst | d trt | d tstdW n tk
r   Y nX | jd	ks| jtkrt| rt | jd tsdS | jd	kr| jd
 dkrdS | jd	kr:| jd
 d
kr:d}nd}| jjdkrjt	| | tkrjd| S tt	| d	ks| jd	krt| d d
krd| S dS dS )a  Determine the type of data indicated by the target.

    Note that this type is the most specific type that can be inferred.
    For example:

        * ``binary`` is more specific but compatible with ``multiclass``.
        * ``multiclass`` of integers is more specific but compatible with
          ``continuous``.
        * ``multilabel-indicator`` is more specific but compatible with
          ``multiclass-multioutput``.

    Parameters
    ----------
    y : array-like

    Returns
    -------
    target_type : string
        One of:

        * 'continuous': `y` is an array-like of floats that are not all
          integers, and is 1d or a column vector.
        * 'continuous-multioutput': `y` is a 2d array of floats that are
          not all integers, and both dimensions are of size > 1.
        * 'binary': `y` contains <= 2 discrete values and is 1d or a column
          vector.
        * 'multiclass': `y` contains more than two discrete values, is not a
          sequence of sequences, and is 1d or a column vector.
        * 'multiclass-multioutput': `y` is a 2d array that contains more
          than two discrete values, is not a sequence of sequences, and both
          dimensions are of size > 1.
        * 'multilabel-indicator': `y` is a label indicator matrix, an array
          of two dimensions with at least two columns, and at most 2 unique
          values.
        * 'unknown': `y` is array-like but none of the above, such as a 3d
          array, sequence of sequences, or an array of non-sequence objects.

    Examples
    --------
    >>> import numpy as np
    >>> type_of_target([0.1, 0.6])
    'continuous'
    >>> type_of_target([1, -1, -1, 1])
    'binary'
    >>> type_of_target(['a', 'b', 'a'])
    'binary'
    >>> type_of_target([1.0, 2.0])
    'binary'
    >>> type_of_target([1, 0, 2])
    'multiclass'
    >>> type_of_target([1.0, 0.0, 3.0])
    'multiclass'
    >>> type_of_target(['a', 'b', 'c'])
    'multiclass'
    >>> type_of_target(np.array([[1, 2], [3, 1]]))
    'multiclass-multioutput'
    >>> type_of_target([[1, 2]])
    'multiclass-multioutput'
    >>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))
    'continuous-multioutput'
    >>> type_of_target(np.array([[0, 1], [1, 1]]))
    'multilabel-indicator'
    r   z:Expected array-like (array or non-string sequence), got %rZSparseSeriesz!y cannot be class 'SparseSeries'.zmultilabel-indicatorunknownr   zYou appear to be using a legacy multi-label data representation. Sequence of sequences are no longer supported; use a binary array or sparse matrix instead.r   r   z-multioutput r-   Z
continuousr   r   N)r#   Sequencer   r   r	   r$   	__class____name__r9   r   r   
IndexErrorr5   r.   objectr%   Zflatr   r/   anyr1   r2   r   )r   ZvalidZsparseseriessuffixr   r   r   r      sB    @$2r   c             C   sr   t | dddkr"|dkr"tdnL|dk	rnt | dddk	r`t| jt|sntd|| jf nt|| _dS dS )a!  Private helper function for factorizing common classes param logic

    Estimators that implement the ``partial_fit`` API need to be provided with
    the list of possible classes at the first call to partial_fit.

    Subsequent calls to partial_fit should check that ``classes`` is still
    consistent with a previous value of ``clf.classes_`` when provided.

    This function returns True if it detects that this was the first call to
    ``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also
    set on ``clf``.

    classes_Nz8classes must be passed on the first call to partial_fit.zD`classes=%r` is not the same as on last call to partial_fit, was: %rTF)getattrr$   r   Zarray_equalrD   r,   )Zclfclassesr   r   r   _check_partial_fit_first_call%  s    

rG   c             C   s  g }g }g }| j \}}t| rh|  } t| j}xt|D ] }| j| j| | j|d   }	|dk	rt||	 }
t	|t	|
 }nd}
| j d ||  }tj
| j| j| | j|d   dd\}}tj||
d}d|kr||dk  |7  < d|kr6|| | j d k r6t|dd}t|d|}|| ||j d  |||	   q@W nlxjt|D ]^}tj
| dd|f dd\}}|| ||j d  tj||d}|||	   qrW |||fS )az  Compute class priors from multioutput-multiclass target data

    Parameters
    ----------
    y : array like or sparse matrix of size (n_samples, n_outputs)
        The labels for each example.

    sample_weight : array-like of shape = (n_samples,), optional
        Sample weights.

    Returns
    -------
    classes : list of size n_outputs of arrays of size (n_classes,)
        List of classes for each column.

    n_classes : list of integers of size n_outputs
        Number of classes in each column

    class_prior : list of size n_outputs of arrays of size (n_classes,)
        Class distribution of each column.

    r   Nr   T)Zreturn_inverse)Zweights)r   r   Ztocscr   ZdiffZindptrrangeindicesr   sumr   r6   Zbincountinsertappend)r   Zsample_weightrF   	n_classesZclass_prior	n_samplesZ	n_outputsZy_nnzkZcol_nonzeroZnz_samp_weightZzeros_samp_weight_sumZ	classes_kZy_kZclass_prior_kr   r   r   class_distributionH  sD    

 

rP   c             C   sB  | j d }t||f}t||f}d}xt|D ]}xt|d |D ]}|dd|f  |dd|f 8  < |dd|f  |dd|f 7  < || dd|f dk|f  d7  < || dd|f dk|f  d7  < |d7 }qHW q4W | }	| }
|	|
kr
|S t|jj}tt	|	t	|
}d| | }|||  S )at  Compute a continuous, tie-breaking OvR decision function from OvO.

    It is important to include a continuous value, not only votes,
    to make computing AUC or calibration meaningful.

    Parameters
    ----------
    predictions : array-like, shape (n_samples, n_classifiers)
        Predicted classes for each binary classifier.

    confidences : array-like, shape (n_samples, n_classifiers)
        Decision functions or predicted probabilities for positive class
        for each binary classifier.

    n_classes : int
        Number of classes. n_classifiers must be
        ``n_classes * (n_classes - 1 ) / 2``
    r   r   Ng      ?)
r   r   ZzerosrH   maxminZfinfor.   epsabs)ZpredictionsZconfidencesrM   rN   ZvotesZsum_of_confidencesrO   ijZmax_confidencesZmin_confidencesrS   Zmax_abs_confidenceZscaler   r   r   _ovr_decision_function  s&    
$$$$
rW   )N)N)__doc__Z
__future__r   	itertoolsr   Zscipy.sparser   Zscipy.sparse.baser   r   r   Znumpyr   Zexternals.sixr	   Zutils.fixesr
   r=   Z
validationr   r   r   r'   r,   r3   r9   r:   r   rG   rP   rW   r   r   r   r   <module>   s.   A.w
#
J