B
    	\h=                 @   s   d dl mZ d dlZddlmZmZ	m
Z dd Zdd Zdd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd1d'd(Zd2d)d*Zd+d, Zd-d. Z d/d0 Z!dS )3    N   )csr_mean_variance_axis0csc_mean_variance_axis0incr_mean_variance_axis0c             C   s,   t | r| jnt| }d| }t|dS )z2Raises a TypeError if X is not a CSR or CSC matrixz,Expected a CSR or CSC sparse matrix, got %s.N)spZissparseformattype	TypeError)XZ
input_typeerr r   8lib/python3.7/site-packages/sklearn/utils/sparsefuncs.py_raise_typeerror   s    r   c             C   s   | dkrt d|  d S )N)r   r   z8Unknown axis value: %d. Use 0 for rows, or 1 for columns)
ValueError)axisr   r   r   _raise_error_wrong_axis   s    r   c             C   s6   |j d | j d kst|  j|j| jdd9  _dS )a  Inplace column scaling of a CSR matrix.

    Scale each feature of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : CSR matrix with shape (n_samples, n_features)
        Matrix to normalize using the variance of the features.

    scale : float array with shape (n_features,)
        Array of precomputed feature-wise values to use for scaling.
    r   r   Zclip)modeN)shapeAssertionErrordataZtakeindices)r
   scaler   r   r   inplace_csr_column_scale   s    r   c             C   s:   |j d | j d kst|  jt|t| j9  _dS )a   Inplace row scaling of a CSR matrix.

    Scale each sample of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : CSR sparse matrix, shape (n_samples, n_features)
        Matrix to be scaled.

    scale : float array with shape (n_samples,)
        Array of precomputed sample-wise values to use for scaling.
    r   N)r   r   r   nprepeatdiffindptr)r
   r   r   r   r   inplace_csr_row_scale.   s    r   c             C   sd   t | t| tjr0|dkr$t| S t| jS n0t| tjrX|dkrLt| S t| jS nt|  dS )a  Compute mean and variance along an axix on a CSR or CSC matrix

    Parameters
    ----------
    X : CSR or CSC sparse matrix, shape (n_samples, n_features)
        Input data.

    axis : int (either 0 or 1)
        Axis along which the axis should be computed.

    Returns
    -------

    means : float array with shape (n_features,)
        Feature-wise means

    variances : float array with shape (n_features,)
        Feature-wise variances

    r   N)	r   
isinstancer   
csr_matrix_csr_mean_var_axis0_csc_mean_var_axis0T
csc_matrixr   )r
   r   r   r   r   mean_variance_axis@   s    r$   c             C   s   t | t| tjr@|dkr,t| |||dS t| j|||dS n@t| tjrx|dkrdt| |||dS t| j|||dS nt|  dS )a  Compute incremental mean and variance along an axix on a CSR or
    CSC matrix.

    last_mean, last_var are the statistics computed at the last step by this
    function. Both must be initialized to 0-arrays of the proper size, i.e.
    the number of features in X. last_n is the number of samples encountered
    until now.

    Parameters
    ----------
    X : CSR or CSC sparse matrix, shape (n_samples, n_features)
        Input data.

    axis : int (either 0 or 1)
        Axis along which the axis should be computed.

    last_mean : float array with shape (n_features,)
        Array of feature-wise means to update with the new data X.

    last_var : float array with shape (n_features,)
        Array of feature-wise var to update with the new data X.

    last_n : int with shape (n_features,)
        Number of samples seen so far, excluded X.

    Returns
    -------

    means : float array with shape (n_features,)
        Updated feature-wise means.

    variances : float array with shape (n_features,)
        Updated feature-wise variances.

    n : int with shape (n_features,)
        Updated number of seen samples.

    Notes
    -----
    NaNs are ignored in the algorithm.

    r   )	last_meanlast_varlast_nN)r   r   r   r   _incr_mean_var_axis0r"   r#   r   )r
   r   r%   r&   r'   r   r   r   incr_mean_variance_axise   s    +

r)   c             C   s>   t | tjrt| j| n t | tjr2t| | nt|  dS )a  Inplace column scaling of a CSC/CSR matrix.

    Scale each feature of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : CSC or CSR matrix with shape (n_samples, n_features)
        Matrix to normalize using the variance of the features.

    scale : float array with shape (n_features,)
        Array of precomputed feature-wise values to use for scaling.
    N)r   r   r#   r   r"   r   r   r   )r
   r   r   r   r   inplace_column_scale   s
    r*   c             C   s>   t | tjrt| j| n t | tjr2t| | nt|  dS )a   Inplace row scaling of a CSR or CSC matrix.

    Scale each row of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : CSR or CSC sparse matrix, shape (n_samples, n_features)
        Matrix to be scaled.

    scale : float array with shape (n_features,)
        Array of precomputed sample-wise values to use for scaling.
    N)r   r   r#   r   r"   r   r   r   )r
   r   r   r   r   inplace_row_scale   s
    r+   c             C   sz   x$||gD ]}t |tjr
tdq
W |dk r<|| jd 7 }|dk rR|| jd 7 }| j|k}|| j| j|k< || j|< dS )a5  
    Swaps two rows of a CSC matrix in-place.

    Parameters
    ----------
    X : scipy.sparse.csc_matrix, shape=(n_samples, n_features)
        Matrix whose two rows are to be swapped.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.
    z m and n should be valid integersr   N)r   r   ndarrayr	   r   r   )r
   mntZm_maskr   r   r   inplace_swap_row_csc   s    
r0   c          	   C   s|  x$||gD ]}t |tjr
tdq
W |dk r<|| jd 7 }|dk rR|| jd 7 }||krd|| }}| j}|| }||d  }|| }||d  }|| }	|| }
|	|
kr| j|d |  |
|	 7  < ||
 | j|d < ||	 | j|< t| jd| | j|| | j|| | j|| | j|d g| _t| jd| | j|| | j|| | j|| | j|d g| _dS )a5  
    Swaps two rows of a CSR matrix in-place.

    Parameters
    ----------
    X : scipy.sparse.csr_matrix, shape=(n_samples, n_features)
        Matrix whose two rows are to be swapped.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.
    z m and n should be valid integersr   r      N)	r   r   r,   r	   r   r   Zconcatenater   r   )r
   r-   r.   r/   r   Zm_startZm_stopZn_startZn_stopZnz_mZnz_nr   r   r   inplace_swap_row_csr   s<    
r2   c             C   s@   t | tjrt| || n"t | tjr4t| || nt|  dS )a:  
    Swaps two rows of a CSC/CSR matrix in-place.

    Parameters
    ----------
    X : CSR or CSC sparse matrix, shape=(n_samples, n_features)
        Matrix whose two rows are to be swapped.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.
    N)r   r   r#   r0   r   r2   r   )r
   r-   r.   r   r   r   inplace_swap_row$  s
    r3   c             C   sl   |dk r|| j d 7 }|dk r,|| j d 7 }t| tjrFt| || n"t| tjr`t| || nt|  dS )aF  
    Swaps two columns of a CSC/CSR matrix in-place.

    Parameters
    ----------
    X : CSR or CSC sparse matrix, shape=(n_samples, n_features)
        Matrix whose two columns are to be swapped.

    m : int
        Index of the column of X to be swapped.

    n : int
        Index of the column of X to be swapped.
    r   r   N)r   r   r   r#   r2   r   r0   r   )r
   r-   r.   r   r   r   inplace_swap_column;  s    r4   c             C   s.   t t | j}|| j| j| }||fS )N)r   Zflatnonzeror   r   Zreduceatr   )r
   Zufuncmajor_indexvaluer   r   r   _minor_reduceV  s    r7   c             C   s   | j | }|dkrtd| j d|  }|dkr8|  n|  }|  t||\}}t|j| |k }||| d||< |dk}	t	|	|}t	|	|}|dkrt
j|tt||ff| jd|fd}
n(t
j||tt|ff| j|dfd}
|
j S )Nr   z&zero-size array to reduction operationr   )dtyper   )r   r   ZtocscZtocsrZsum_duplicatesr7   r   r   r   compressr   Z
coo_matrixzeroslenr8   Aravel)r
   r   
min_or_maxNMZmatr5   r6   Znot_fullmaskZresr   r   r   _min_or_max_axis\  s$    
rB   c             C   s   |d krdd| j krtd| jd}| jdkr4|S || j }| jt	| j kr`|||}|S |dk rt|d7 }|dks|dkrt
| ||S tdd S )Nr   z&zero-size array to reduction operationr1   r   z.invalid axis, use 0 for rows, or 1 for columns)r   r   r8   r   nnzreducer   r=   r   productrB   )r
   r   r>   Zzeror-   r   r   r   _sparse_min_or_maxs  s    


rF   c             C   s   t | |tjt | |tjfS )N)rF   r   ZminimumZmaximum)r
   r   r   r   r   _sparse_min_max  s    rG   c             C   s   t | |tjt | |tjfS )N)rF   r   ZfminZfmax)r
   r   r   r   r   _sparse_nan_min_max  s    rH   Fc             C   sB   t | tjst | tjr6|r(t| |dS t| |dS nt|  dS )a^  Compute minimum and maximum along an axis on a CSR or CSC matrix and
    optionally ignore NaN values.

    Parameters
    ----------
    X : CSR or CSC sparse matrix, shape (n_samples, n_features)
        Input data.

    axis : int (either 0 or 1)
        Axis along which the axis should be computed.

    ignore_nan : bool, default is False
        Ignore or passing through NaN values.

        .. versionadded:: 0.20

    Returns
    -------

    mins : float array with shape (n_features,)
        Feature-wise minima

    maxs : float array with shape (n_features,)
        Feature-wise maxima
    )r   N)r   r   r   r#   rH   rG   r   )r
   r   Z
ignore_nanr   r   r   min_max_axis  s
    rI   c             C   s   |dkrd}n(|dkrd}n| j dkr6td | j |dkrb|dkrL| jS tt| j|S n|dkrt| j}|dkr|S || S |dkr|dkrtj| j| j	d dS t
|t| j}tj| j| j	d |d	S ntd
 |dS )a  A variant of X.getnnz() with extension to weighting on axis 0

    Useful in efficiently calculating multilabel metrics.

    Parameters
    ----------
    X : CSR sparse matrix, shape = (n_samples, n_labels)
        Input data.

    axis : None, 0 or 1
        The axis on which the data is aggregated.

    sample_weight : array, shape = (n_samples,), optional
        Weight for each row of X.
    r   r   Zcsrz#Expected CSR sparse format, got {0}N)	minlength)rL   weightszUnsupported axis: {0})r   r	   rC   r   dotr   r   Zbincountr   r   r   r   )r
   r   Zsample_weightoutrM   r   r   r   count_nonzero  s,    

rP   c             C   sp   t | | }|stjS t| dk }t|d\}}|   |rLt|| ||S t|d | ||t|| || d S )zCompute the median of data with n_zeros additional zeros.

    This function is used to support sparse matrices; it modifies data in-place
    r   r1   r   g       @)r;   r   nanrP   divmodsort_get_elem_at_rank)r   n_zerosZn_elems
n_negativeZmiddleZis_oddr   r   r   _get_median  s    rW   c             C   s,   | |k r||  S | | |k r dS || |  S )z@Find the value in data augmented with n_zeros for the given rankr   r   )Zrankr   rV   rU   r   r   r   rT     s
    rT   c       
      C   s   t | tjstd| j | j}| j\}}t|}xZt	t
|dd |dd D ]8\}\}}t| j|| }||j }	t||	||< qTW |S )a'  Find the median across axis 0 of a CSC matrix.
    It is equivalent to doing np.median(X, axis=0).

    Parameters
    ----------
    X : CSC sparse matrix, shape (n_samples, n_features)
        Input data.

    Returns
    -------
    median : ndarray, shape (n_features,)
        Median.

    z%Expected matrix of CSC format, got %sNrJ   r   )r   r   r#   r	   r   r   r   r   r:   	enumeratezipcopyr   sizerW   )
r
   r   Z	n_samplesZ
n_featuresZmedianZf_indstartendr   Znzr   r   r   csc_median_axis_0  s    

,
r^   )F)NN)"Zscipy.sparseZsparser   Znumpyr   Zsparsefuncs_fastr   r    r   r!   r   r(   r   r   r   r   r$   r)   r*   r+   r0   r2   r3   r4   r7   rB   rF   rG   rH   rI   rP   rW   rT   r^   r   r   r   r   <module>   s0   %?7
#
0	