B
    	\Q                 @   sh  d Z ddlmZmZ ddlmZ ddlZddlZddlZddl	Z
ddlmZ ddlmZmZ ddlmZmZ ddlmZmZmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ e
 e
j!j"Z#ej$e
j%fZ&dd Z'dd Z(dd Z)d7ddZ*dd Z+dd Z,dd Z-dd Z.d8d d!Z/d"d# Z0d9d'd(Z1d:d)d*Z2d+d, Z3d;d.d/Z4d<d3d4Z5G d5d6 d6eeZ6dS )=z# Non-negative matrix factorization
    )divisionprint_function)sqrtN   )BaseEstimatorTransformerMixin)check_random_statecheck_array)randomized_svdsafe_sparse_dotsquared_norm)safe_min)check_is_fittedcheck_non_negative)ConvergenceWarning   )_update_cdnmf_fastc             C   s   t t| S )zDot product-based Euclidean norm implementation

    See: http://fseoane.net/blog/2011/computing-the-vector-norm/

    Parameters
    ----------
    x : array-like
        Vector for which to compute the norm
    )r   r   )x r   8lib/python3.7/site-packages/sklearn/decomposition/nmf.pynorm!   s    
r   c             C   s   t |  | S )zTrace of np.dot(X, Y.T).

    Parameters
    ----------
    X : array-like
        First matrix
    Y : array-like
        Second matrix
    )npdotravel)XYr   r   r   	trace_dot.   s    
r   c             C   sV   t | } t| |kr.td||t| f t| | t| dkrRtd| d S )Nz=Array with wrong shape passed to %s. Expected %s, but got %s r   z$Array passed to %s is full of zeros.)r	   r   shape
ValueErrorr   max)Ar   Zwhomr   r   r   _check_init;   s    
r!   Fc          
   C   sv  t |}t| st| } t|}t|}|dkrt| rt| j| j}ttt|j|||}t| |j |}|| d|  d }nt	| t|| d }|rt
|d S |S t| rt||| j}	| j}
nt||}| }	|  }
|
tk}|	| }	|
| }
t|	|	dk< |dkrxttj|ddtj|dd}|
|	 }t|
t|}|||
  7 }n|dkr|
|	 }t|t| j tt| }nt| rd}xNt| jd D ],}|tt||dd|f | 7 }qW nt|| }t|
|	|d  }|
|  ||  }|||d  7 }|||d   }|rnt
d| S |S dS )a{  Compute the beta-divergence of X and dot(W, H).

    Parameters
    ----------
    X : float or array-like, shape (n_samples, n_features)

    W : float or dense array-like, shape (n_samples, n_components)

    H : float or dense array-like, shape (n_components, n_features)

    beta : float, string in {'frobenius', 'kullback-leibler', 'itakura-saito'}
        Parameter of the beta-divergence.
        If beta == 2, this is half the Frobenius *squared* norm.
        If beta == 1, this is the generalized Kullback-Leibler divergence.
        If beta == 0, this is the Itakura-Saito divergence.
        Else, this is the general beta-divergence.

    square_root : boolean, default False
        If True, return np.sqrt(2 * res)
        For beta == 2, it corresponds to the Frobenius norm.

    Returns
    -------
        res : float
            Beta divergence of X and np.dot(X, H)
    r   g       @r   r   )axisN)_beta_loss_to_floatspissparser   Z
atleast_2dr   datar   Tr   r   _special_sparse_dotr   EPSILONsumlogproductr   range)r   WHZbetasquare_rootZnorm_XZnorm_WHZ
cross_prodZresZWH_dataX_dataWHindicesZsum_WHZdivZsum_WH_betaiZsum_X_WHr   r   r   _beta_divergenceE   sZ    






 
(.r5   c             C   st   t |rd| \}}t| |ddf |j|ddf jdd}t j|||ff|jd}|	 S t
| |S dS )z0Computes np.dot(W, H), only where X is non zero.Nr   )r"   )r   )r$   r%   Znonzeror   Zmultiplyr'   r*   Z
coo_matrixr   Ztocsrr   )r.   r/   r   ZiiZjjZdot_valsr2   r   r   r   r(      s    
.r(   c       	      C   s\   d}d}|dkrt | }|dkr(t | }|| }|| }|d|  }|d|  }||||fS )z9Compute L1 and L2 regularization coefficients for W and Hg        )both
components)r6   transformationg      ?)float)	alphal1_ratioregularizationZalpha_HZalpha_Wl1_reg_Wl1_reg_Hl2_reg_Wl2_reg_Hr   r   r   _compute_regularization   s    rA   c             C   s   d}| |krt d| |f d}||kr8t d||f | dkrX|dkrXt d| |f | dkrt|dkrttd	t t|}|S )
N)cdmuz5Invalid solver parameter: got %r instead of one of %r)r6   r7   r8   Nz=Invalid regularization parameter: got %r instead of one of %rrC   )r   	frobeniuszEInvalid beta_loss parameter: solver %r does not handle beta_loss = %rnndsvdzThe multiplicative update ('mu') solver cannot update zeros present in the initialization, and so leads to poorer results when used jointly with init='nndsvd'. You may try init='nndsvda' or init='nndsvdar' instead.)r   warningswarnUserWarningr#   )solverr<   	beta_lossinitZallowed_solverZallowed_regularizationr   r   r   _check_string_param   s&    rL   c             C   sJ   dddd}t | tr&| |kr&||  } t | tjsFtd| | f | S )z!Convert string beta_loss to floatr   r   r   )rD   zkullback-leiblerzitakura-saitozEInvalid beta_loss parameter: got %r instead of one of %r, or a float.)
isinstancestrnumbersNumberr   keys)rJ   Zallowed_beta_lossr   r   r   r#      s    r#   ư>c             C   s  t | d | j\}}|dkr.||k r*d}nd}|dkrt|  | }t|}|||| }	|||| }
t|	|	 t|
|
 |
|	fS t| ||d\}}}t	|jt	|j }
}	t|d t|dddf  |
dddf< t|d t|dddf  |	dddf< xt
d|D ]}|dd|f ||ddf  }}t|dt|d }}tt|dtt|d }}t|t| }}t|t| }}|| ||  }}||kr|| }|| }|}n|| }|| }|}t|| | }|| |
dd|f< || |	|ddf< q,W d|
|
|k < d|	|	|k < |dkrbn|dkr|  }||
|
dk< ||	|	dk< n|d	krt|}|  }t||t|
|
dk  d
 |
|
dk< t||t|	|	dk  d
 |	|	dk< ntd|df |
|	fS )a  Algorithms for NMF initialization.

    Computes an initial guess for the non-negative
    rank k matrix approximation for X: X = WH

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        The data matrix to be decomposed.

    n_components : integer
        The number of components desired in the approximation.

    init :  None | 'random' | 'nndsvd' | 'nndsvda' | 'nndsvdar'
        Method used to initialize the procedure.
        Default: 'nndsvd' if n_components < n_features, otherwise 'random'.
        Valid options:

        - 'random': non-negative random matrices, scaled with:
            sqrt(X.mean() / n_components)

        - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)
            initialization (better for sparseness)

        - 'nndsvda': NNDSVD with zeros filled with the average of X
            (better when sparsity is not desired)

        - 'nndsvdar': NNDSVD with zeros filled with small random values
            (generally faster, less accurate alternative to NNDSVDa
            for when sparsity is not desired)

        - 'custom': use custom matrices W and H

    eps : float
        Truncate all values less then this in output to zero.

    random_state : int, RandomState instance or None, optional, default: None
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`. Used when ``random`` == 'nndsvdar' or 'random'.

    Returns
    -------
    W : array-like, shape (n_samples, n_components)
        Initial guesses for solving X ~= WH

    H : array-like, shape (n_components, n_features)
        Initial guesses for solving X ~= WH

    References
    ----------
    C. Boutsidis, E. Gallopoulos: SVD based initialization: A head start for
    nonnegative matrix factorization - Pattern Recognition, 2008
    http://tinyurl.com/nndsvd
    zNMF initializationNrE   random)random_stater   r   nndsvdanndsvdard   z3Invalid init parameter: got %r instead of one of %r)NrS   rE   rU   rV   )r   r   r   r   meanr   Zrandnabsr
   zerosr-   ZmaximumZminimumr   lenr   )r   n_componentsrK   epsrT   	n_samples
n_featuresavgrngr/   r.   USVjr   yZx_pZy_pZx_nZy_nZx_p_nrmZy_p_nrmZx_n_nrmZy_n_nrmZm_pZm_nuvZsigmaZlbdr   r   r   _initialize_nmf   sh    :

00"&



*,ri   c             C   s   |j d }t|j|}t| |}	|dkrF|jdd|d   |7  < |dkrV|	|8 }	|rf||}
n
t|}
tj|
tj	d}
t
|||	|
S )zHelper function for _fit_coordinate_descent

    Update W to minimize the objective function, iterating once over all
    coordinates. By symmetry, to update H, one can call
    _update_coordinate_descent(X.T, Ht, W, ...)

    r   g        N)dtype)r   r   r   r'   r   ZflatpermutationZarangeZasarrayZintpr   )r   r.   HtZl1_regZl2_regshufflerT   r\   HHtXHtrk   r   r   r   _update_coordinate_descent  s    	


rp   -C6?   Tc          
   C   s   t |jdd}t | dd} t|}xt|D ]}d}|t| ||||||7 }|	rj|t| j||||||7 }|dkrv|}|dkrP |
rtd||  || |kr,|
rtd|d	  P q,W ||j|fS )
a	  Compute Non-negative Matrix Factorization (NMF) with Coordinate Descent

    The objective function is minimized with an alternating minimization of W
    and H. Each minimization is done with a cyclic (up to a permutation of the
    features) Coordinate Descent.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        Constant matrix.

    W : array-like, shape (n_samples, n_components)
        Initial guess for the solution.

    H : array-like, shape (n_components, n_features)
        Initial guess for the solution.

    tol : float, default: 1e-4
        Tolerance of the stopping condition.

    max_iter : integer, default: 200
        Maximum number of iterations before timing out.

    l1_reg_W : double, default: 0.
        L1 regularization parameter for W.

    l1_reg_H : double, default: 0.
        L1 regularization parameter for H.

    l2_reg_W : double, default: 0.
        L2 regularization parameter for W.

    l2_reg_H : double, default: 0.
        L2 regularization parameter for H.

    update_H : boolean, default: True
        Set to True, both W and H will be estimated from initial guesses.
        Set to False, only W will be estimated.

    verbose : integer, default: 0
        The verbosity level.

    shuffle : boolean, default: False
        If true, randomize the order of coordinates in the CD solver.

    random_state : int, RandomState instance or None, optional, default: None
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    Returns
    -------
    W : array-like, shape (n_samples, n_components)
        Solution to the non-negative least squares problem.

    H : array-like, shape (n_components, n_features)
        Solution to the non-negative least squares problem.

    n_iter : int
        The number of iterations done by the algorithm.

    References
    ----------
    Cichocki, Andrzej, and Phan, Anh-Huy. "Fast local algorithms for
    large scale nonnegative matrix and tensor factorizations."
    IEICE transactions on fundamentals of electronics, communications and
    computer sciences 92.3: 708-721, 2009.
    C)ordercsr)accept_sparseg        r   z
violation:zConverged at iterationr   )r	   r'   r   r-   rp   print)r   r.   r/   tolmax_iterr=   r>   r?   r@   update_Hverboserm   rT   rl   ra   n_iterZ	violationZviolation_initr   r   r   _fit_coordinate_descent  s*    Ir}   c             C   sR  |dkrT|	dkrt | |j}	|
r&|	}n|	 }|dkrDt||j}t||}nt||| }t| rx|j}| j}n(|}| }| }|d dk rt	||dk< |d dk rt	||dk< |dkrtj
|||d n6|dkr|dC }|dC }||9 }n||d C }||9 }t ||j}|dkrJ|dkr6tj|dd	}|tjddf }nt| rt|j}xt| jd D ]^}t||ddf |}|d dk rt	||dk< ||d C }t||j||ddf< qrW n||d C }t||j}|}|dkr||7 }|dkr|||  }t	||dk< || }|}|dkrF||C }||||	fS )
z%update W in Multiplicative Update NMFr   Ng      ?r   g       @r   )out)r"   )r   r'   copyr   r   r(   r$   r%   r&   r)   divider*   newaxisemptyr   r-   )r   r.   r/   rJ   r=   r?   gammaH_sumrn   ro   rz   	numeratordenominator	WH_safe_XWH_safe_X_datar1   r2   ZWHHtr4   WHidelta_Wr   r   r   _multiplicative_update_w
  sl    



"


r   c             C   s&  |dkr.t |j| }tt|j||}nt||| }	t| rR|	j}
| j}n(|	}
| }|	 }|d dk rzt	||dk< |d dk rt	|
|
dk< |dkrtj
||
|
d n6|dkr|
dC }
|
dC }
|
|9 }
n|
|d C }
|
|9 }
t |j|	}|dkr&tj|dd}d||dk< |d	d	tjf }nt| rt|j}xt| jd D ]^}t||d	d	|f }|d dk rt	||dk< ||d C }t|j||d	d	|f< qNW n||d C }t|j|}|}|dkr||7 }|dkr|||  }t	||dk< || }|}|dkr"||C }|S )
z%update H in Multiplicative Update NMFr   g      ?r   g       @r   )r~   r   )r"   N)r   r'   r   r   r(   r$   r%   r&   r   r)   r   r*   r   r   r   r-   )r   r.   r/   rJ   r>   r@   r   r   r   r   r   r1   r2   ZW_sumZWtWHr4   r   delta_Hr   r   r   _multiplicative_update_hi  s`    


"


r   rD   c             C   s  t   }t|}|dk r&dd|  }n|dkr<d|d  }nd}t| |||dd}|}d\}}}xtd|d D  ]}t| ||||||||||
\}}}}||9 }|dk rd||ttjjk < |
rt	| |||||	|}||9 }d\}}}|dkrd||ttjjk < |d	krr|d
 d	krrt| |||dd}|rXt   }t
d||| |f  || | |k rlP |}qrW |r|d	ks|d
 d	krt   }t
d||| f  |||fS )a  Compute Non-negative Matrix Factorization with Multiplicative Update

    The objective function is _beta_divergence(X, WH) and is minimized with an
    alternating minimization of W and H. Each minimization is done with a
    Multiplicative Update.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        Constant input matrix.

    W : array-like, shape (n_samples, n_components)
        Initial guess for the solution.

    H : array-like, shape (n_components, n_features)
        Initial guess for the solution.

    beta_loss : float or string, default 'frobenius'
        String must be in {'frobenius', 'kullback-leibler', 'itakura-saito'}.
        Beta divergence to be minimized, measuring the distance between X
        and the dot product WH. Note that values different from 'frobenius'
        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower
        fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input
        matrix X cannot contain zeros.

    max_iter : integer, default: 200
        Number of iterations.

    tol : float, default: 1e-4
        Tolerance of the stopping condition.

    l1_reg_W : double, default: 0.
        L1 regularization parameter for W.

    l1_reg_H : double, default: 0.
        L1 regularization parameter for H.

    l2_reg_W : double, default: 0.
        L2 regularization parameter for W.

    l2_reg_H : double, default: 0.
        L2 regularization parameter for H.

    update_H : boolean, default: True
        Set to True, both W and H will be estimated from initial guesses.
        Set to False, only W will be estimated.

    verbose : integer, default: 0
        The verbosity level.

    Returns
    -------
    W : array, shape (n_samples, n_components)
        Solution to the non-negative least squares problem.

    H : array, shape (n_components, n_features)
        Solution to the non-negative least squares problem.

    n_iter : int
        The number of iterations done by the algorithm.

    References
    ----------
    Fevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix
    factorization with the beta-divergence. Neural Computation, 23(9).
    r   g      ?g       @r   T)r0   )NNNg        r   
   z0Epoch %02d reached after %.3f seconds, error: %fz&Epoch %02d reached after %.3f seconds.)timer#   r5   r-   r   r   finfoZfloat64r]   r   rw   )r   r.   r/   rJ   ry   rx   r=   r>   r?   r@   rz   r{   Z
start_timer   Zerror_at_initZprevious_errorr   rn   ro   r|   r   r   errorZ	iter_timeZend_timer   r   r   _fit_multiplicative_update  sL    F


r   rS   rB           c             C   s  t | dtd} t| d t||||}t| dkrB|dkrBtd| j\}}|dkrX|}t|trj|dkrvtd| t|	tr|	dk rtd|	 t|t	j
r|dk rtd	| |d
kr|rt|||fd t|||fd nh|s6t|||fd |dkr&t|  | }t||f|}nt||f}nt| |||d\}}t|
||\}}}}|dkrt| ||||	||||||||d\}}}n<|dkrt| ||||	|||||||\}}}ntd| ||	kr|dkrtd|	 t |||fS )aO  Compute Non-negative Matrix Factorization (NMF)

    Find two non-negative matrices (W, H) whose product approximates the non-
    negative matrix X. This factorization can be used for example for
    dimensionality reduction, source separation or topic extraction.

    The objective function is::

        0.5 * ||X - WH||_Fro^2
        + alpha * l1_ratio * ||vec(W)||_1
        + alpha * l1_ratio * ||vec(H)||_1
        + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2
        + 0.5 * alpha * (1 - l1_ratio) * ||H||_Fro^2

    Where::

        ||A||_Fro^2 = \sum_{i,j} A_{ij}^2 (Frobenius norm)
        ||vec(A)||_1 = \sum_{i,j} abs(A_{ij}) (Elementwise L1 norm)

    For multiplicative-update ('mu') solver, the Frobenius norm
    (0.5 * ||X - WH||_Fro^2) can be changed into another beta-divergence loss,
    by changing the beta_loss parameter.

    The objective function is minimized with an alternating minimization of W
    and H. If H is given and update_H=False, it solves for W only.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        Constant matrix.

    W : array-like, shape (n_samples, n_components)
        If init='custom', it is used as initial guess for the solution.

    H : array-like, shape (n_components, n_features)
        If init='custom', it is used as initial guess for the solution.
        If update_H=False, it is used as a constant, to solve for W only.

    n_components : integer
        Number of components, if n_components is not set all features
        are kept.

    init :  None | 'random' | 'nndsvd' | 'nndsvda' | 'nndsvdar' | 'custom'
        Method used to initialize the procedure.
        Default: 'random'.
        Valid options:

        - 'random': non-negative random matrices, scaled with:
            sqrt(X.mean() / n_components)

        - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)
            initialization (better for sparseness)

        - 'nndsvda': NNDSVD with zeros filled with the average of X
            (better when sparsity is not desired)

        - 'nndsvdar': NNDSVD with zeros filled with small random values
            (generally faster, less accurate alternative to NNDSVDa
            for when sparsity is not desired)

        - 'custom': use custom matrices W and H

    update_H : boolean, default: True
        Set to True, both W and H will be estimated from initial guesses.
        Set to False, only W will be estimated.

    solver : 'cd' | 'mu'
        Numerical solver to use:
        'cd' is a Coordinate Descent solver that uses Fast Hierarchical
            Alternating Least Squares (Fast HALS).
        'mu' is a Multiplicative Update solver.

        .. versionadded:: 0.17
           Coordinate Descent solver.

        .. versionadded:: 0.19
           Multiplicative Update solver.

    beta_loss : float or string, default 'frobenius'
        String must be in {'frobenius', 'kullback-leibler', 'itakura-saito'}.
        Beta divergence to be minimized, measuring the distance between X
        and the dot product WH. Note that values different from 'frobenius'
        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower
        fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input
        matrix X cannot contain zeros. Used only in 'mu' solver.

        .. versionadded:: 0.19

    tol : float, default: 1e-4
        Tolerance of the stopping condition.

    max_iter : integer, default: 200
        Maximum number of iterations before timing out.

    alpha : double, default: 0.
        Constant that multiplies the regularization terms.

    l1_ratio : double, default: 0.
        The regularization mixing parameter, with 0 <= l1_ratio <= 1.
        For l1_ratio = 0 the penalty is an elementwise L2 penalty
        (aka Frobenius Norm).
        For l1_ratio = 1 it is an elementwise L1 penalty.
        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.

    regularization : 'both' | 'components' | 'transformation' | None
        Select whether the regularization affects the components (H), the
        transformation (W), both or none of them.

    random_state : int, RandomState instance or None, optional, default: None
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    verbose : integer, default: 0
        The verbosity level.

    shuffle : boolean, default: False
        If true, randomize the order of coordinates in the CD solver.

    Returns
    -------
    W : array-like, shape (n_samples, n_components)
        Solution to the non-negative least squares problem.

    H : array-like, shape (n_components, n_features)
        Solution to the non-negative least squares problem.

    n_iter : int
        Actual number of iterations.

    Examples
    --------
    >>> import numpy as np
    >>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
    >>> from sklearn.decomposition import non_negative_factorization
    >>> W, H, n_iter = non_negative_factorization(X, n_components=2,
    ... init='random', random_state=0)

    References
    ----------
    Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for
    large scale nonnegative matrix and tensor factorizations."
    IEICE transactions on fundamentals of electronics, communications and
    computer sciences 92.3: 708-721, 2009.

    Fevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix
    factorization with the beta-divergence. Neural Computation, 23(9).
    )ru   csc)rv   rj   zNMF (input X)r   z|When beta_loss <= 0 and X contains zeros, the solver may diverge. Please add small values to X, or use a positive beta_loss.NzFNumber of components must be a positive integer; got (n_components=%r)zJMaximum number of iterations must be a positive integer; got (max_iter=%r)z>Tolerance for stopping criteria must be positive; got (tol=%r)ZcustomzNMF (input H)zNMF (input W)rC   )rK   rT   rB   )rz   r{   rm   rT   zInvalid solver parameter '%s'.zKMaximum number of iteration %d reached. Increase it to improve convergence.)r	   r9   r   rL   r   r   r   rM   INTEGER_TYPESrO   rP   r!   r   r   rX   ZfullrZ   ri   rA   r}   r   rF   rG   r   )r   r.   r/   r\   rK   rz   rI   rJ   rx   ry   r:   r;   r<   rT   r{   rm   r^   r_   r`   r=   r>   r?   r@   r|   r   r   r   non_negative_factorizationA  sb     





r   c               @   s>   e Zd ZdZdd
dZdddZdddZdd Zdd ZdS )NMFa  Non-Negative Matrix Factorization (NMF)

    Find two non-negative matrices (W, H) whose product approximates the non-
    negative matrix X. This factorization can be used for example for
    dimensionality reduction, source separation or topic extraction.

    The objective function is::

        0.5 * ||X - WH||_Fro^2
        + alpha * l1_ratio * ||vec(W)||_1
        + alpha * l1_ratio * ||vec(H)||_1
        + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2
        + 0.5 * alpha * (1 - l1_ratio) * ||H||_Fro^2

    Where::

        ||A||_Fro^2 = \sum_{i,j} A_{ij}^2 (Frobenius norm)
        ||vec(A)||_1 = \sum_{i,j} abs(A_{ij}) (Elementwise L1 norm)

    For multiplicative-update ('mu') solver, the Frobenius norm
    (0.5 * ||X - WH||_Fro^2) can be changed into another beta-divergence loss,
    by changing the beta_loss parameter.

    The objective function is minimized with an alternating minimization of W
    and H.

    Read more in the :ref:`User Guide <NMF>`.

    Parameters
    ----------
    n_components : int or None
        Number of components, if n_components is not set all features
        are kept.

    init :  'random' | 'nndsvd' |  'nndsvda' | 'nndsvdar' | 'custom'
        Method used to initialize the procedure.
        Default: 'nndsvd' if n_components < n_features, otherwise random.
        Valid options:

        - 'random': non-negative random matrices, scaled with:
            sqrt(X.mean() / n_components)

        - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)
            initialization (better for sparseness)

        - 'nndsvda': NNDSVD with zeros filled with the average of X
            (better when sparsity is not desired)

        - 'nndsvdar': NNDSVD with zeros filled with small random values
            (generally faster, less accurate alternative to NNDSVDa
            for when sparsity is not desired)

        - 'custom': use custom matrices W and H

    solver : 'cd' | 'mu'
        Numerical solver to use:
        'cd' is a Coordinate Descent solver.
        'mu' is a Multiplicative Update solver.

        .. versionadded:: 0.17
           Coordinate Descent solver.

        .. versionadded:: 0.19
           Multiplicative Update solver.

    beta_loss : float or string, default 'frobenius'
        String must be in {'frobenius', 'kullback-leibler', 'itakura-saito'}.
        Beta divergence to be minimized, measuring the distance between X
        and the dot product WH. Note that values different from 'frobenius'
        (or 2) and 'kullback-leibler' (or 1) lead to significantly slower
        fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input
        matrix X cannot contain zeros. Used only in 'mu' solver.

        .. versionadded:: 0.19

    tol : float, default: 1e-4
        Tolerance of the stopping condition.

    max_iter : integer, default: 200
        Maximum number of iterations before timing out.

    random_state : int, RandomState instance or None, optional, default: None
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    alpha : double, default: 0.
        Constant that multiplies the regularization terms. Set it to zero to
        have no regularization.

        .. versionadded:: 0.17
           *alpha* used in the Coordinate Descent solver.

    l1_ratio : double, default: 0.
        The regularization mixing parameter, with 0 <= l1_ratio <= 1.
        For l1_ratio = 0 the penalty is an elementwise L2 penalty
        (aka Frobenius Norm).
        For l1_ratio = 1 it is an elementwise L1 penalty.
        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.

        .. versionadded:: 0.17
           Regularization parameter *l1_ratio* used in the Coordinate Descent
           solver.

    verbose : bool, default=False
        Whether to be verbose.

    shuffle : boolean, default: False
        If true, randomize the order of coordinates in the CD solver.

        .. versionadded:: 0.17
           *shuffle* parameter used in the Coordinate Descent solver.

    Attributes
    ----------
    components_ : array, [n_components, n_features]
        Factorization matrix, sometimes called 'dictionary'.

    reconstruction_err_ : number
        Frobenius norm of the matrix difference, or beta-divergence, between
        the training data ``X`` and the reconstructed data ``WH`` from
        the fitted model.

    n_iter_ : int
        Actual number of iterations.

    Examples
    --------
    >>> import numpy as np
    >>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
    >>> from sklearn.decomposition import NMF
    >>> model = NMF(n_components=2, init='random', random_state=0)
    >>> W = model.fit_transform(X)
    >>> H = model.components_

    References
    ----------
    Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for
    large scale nonnegative matrix and tensor factorizations."
    IEICE transactions on fundamentals of electronics, communications and
    computer sciences 92.3: 708-721, 2009.

    Fevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix
    factorization with the beta-divergence. Neural Computation, 23(9).
    NrB   rD   -C6?rr           r   Fc             C   sF   || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	|| _
d S )N)r\   rK   rI   rJ   rx   ry   rT   r:   r;   r{   rm   )selfr\   rK   rI   rJ   rx   ry   rT   r:   r;   r{   rm   r   r   r   __init__  s    zNMF.__init__c             C   s   t |dtd}t|||| j| jd| j| j| j| j| j	| j
d| j| j| jd\}}}t|||| jdd| _|jd | _|| _|| _|S )a  Learn a NMF model for the data X and returns the transformed data.

        This is more efficient than calling fit followed by transform.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Data matrix to be decomposed

        y : Ignored

        W : array-like, shape (n_samples, n_components)
            If init='custom', it is used as initial guess for the solution.

        H : array-like, shape (n_components, n_features)
            If init='custom', it is used as initial guess for the solution.

        Returns
        -------
        W : array, shape (n_samples, n_components)
            Transformed data.
        )ru   r   )rv   rj   Tr6   )r   r.   r/   r\   rK   rz   rI   rJ   rx   ry   r:   r;   r<   rT   r{   rm   )r0   r   )r	   r9   r   r\   rK   rI   rJ   rx   ry   r:   r;   rT   r{   rm   r5   Zreconstruction_err_r   n_components_components_n_iter_)r   r   rf   r.   r/   r   r   r   r   fit_transform  s    

zNMF.fit_transformc             K   s   | j |f| | S )a  Learn a NMF model for the data X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Data matrix to be decomposed

        y : Ignored

        Returns
        -------
        self
        )r   )r   r   rf   Zparamsr   r   r   fit  s    zNMF.fitc             C   sT   t | d t|d| j| j| jd| j| j| j| j| j	| j
d| j| j| jd\}}}|S )aU  Transform the data X according to the fitted NMF model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Data matrix to be transformed by the model

        Returns
        -------
        W : array, shape (n_samples, n_components)
            Transformed data
        r   NFr6   )r   r.   r/   r\   rK   rz   rI   rJ   rx   ry   r:   r;   r<   rT   r{   rm   )r   r   r   r   rK   rI   rJ   rx   ry   r:   r;   rT   r{   rm   )r   r   r.   _r   r   r   r   	transform  s    


zNMF.transformc             C   s   t | d t|| jS )ay  Transform data back to its original space.

        Parameters
        ----------
        W : {array-like, sparse matrix}, shape (n_samples, n_components)
            Transformed data matrix

        Returns
        -------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Data matrix of original shape

        .. versionadded:: 0.18
        r   )r   r   r   r   )r   r.   r   r   r   inverse_transform  s    
zNMF.inverse_transform)NNrB   rD   r   rr   Nr   r   r   F)NNN)N)	__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r     s       

*
r   )F)NrR   N)
rq   rr   r   r   r   r   Tr   FN)NNNT)	rD   rr   rq   r   r   r   r   Tr   )NNNrS   TrB   rD   rq   rr   r   r   NNr   F)7r   Z
__future__r   r   Zmathr   rF   rO   r   Znumpyr   Zscipy.sparseZsparser$   baser   r   Zutilsr   r	   Zutils.extmathr
   r   r   r   Zutils.validationr   r   
exceptionsr   Z
cdnmf_fastr   r   Zfloat32r]   r)   ZIntegralZintegerr   r   r   r!   r5   r(   rA   rL   r#   ri   rp   r}   r   r   r   r   r   r   r   r   r   <module>   s\   

j 
 
  
i
^R   
      
 Z