B
    	\uj              	   @   s   d Z ddlZddlmZmZmZmZ ddlm	Z	m
Z
 ddlmZ ddlmZmZmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ d ddZd!ddZd"ddZd#ddZG dd deeeZdS )$zLocally Linear Embedding    N)eighsvdqrsolve)eye
csr_matrix)eigsh   )BaseEstimatorTransformerMixin_UnstableArchMixin)check_random_statecheck_array)stable_cumsum)check_is_fitted)FLOAT_DTYPES)NearestNeighborsMbP?c             C   s   t | td} t |tdd}| jd |jd  }}tj||f| jd}tj|| jd}xt|dddD ]\}}|j	| |  }	t
|	|	j	}
t|
}|dkr|| }n|}|
jdd|jd d   |7  < t|
|dd}|t| ||ddf< qhW |S )	ab  Compute barycenter weights of X from Y along the first axis

    We estimate the weights to assign to each point in Y[i] to recover
    the point X[i]. The barycenter weights sum to 1.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_dim)

    Z : array-like, shape (n_samples, n_neighbors, n_dim)

    reg : float, optional
        amount of regularization to add for the problem to be
        well-posed in the case of n_neighbors > n_dim

    Returns
    -------
    B : array-like, shape (n_samples, n_neighbors)

    Notes
    -----
    See developers note for more information.
    )dtypeT)r   Zallow_ndr      r	   N)Zsym_pos)r   r   shapenpemptyr   ones	enumerate	transposeTdottraceflatr   sum)XZreg	n_samplesn_neighborsBviACGr   Rw r.   >lib/python3.7/site-packages/sklearn/manifold/locally_linear.pybarycenter_weights   s     

"r0   c       	      C   s   t |d |d| }|j} | jd }|j| ddddddf }t| | | |d}td|| d |}t|	 |	 |f||fdS )	a4  Computes the barycenter weighted graph of k-Neighbors for points in X

    Parameters
    ----------
    X : {array-like, NearestNeighbors}
        Sample data, shape = (n_samples, n_features), in the form of a
        numpy array or a NearestNeighbors object.

    n_neighbors : int
        Number of neighbors for each sample.

    reg : float, optional
        Amount of regularization when solving the least-squares
        problem. Only relevant if mode='barycenter'. If None, use the
        default.

    n_jobs : int or None, optional (default=None)
        The number of parallel jobs to run for neighbors search.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    Returns
    -------
    A : sparse matrix in CSR format, shape = [n_samples, n_samples]
        A[i, j] is assigned the weight of edge that connects i to j.

    See also
    --------
    sklearn.neighbors.kneighbors_graph
    sklearn.neighbors.radius_neighbors_graph
    r   )n_jobsr   F)return_distanceN)r#   )r   )
r   fit_fit_Xr   
kneighborsr0   r   Zaranger   Zravel)	r!   r%   r#   r1   Zknnr$   inddataZindptrr.   r.   r/   barycenter_kneighbors_graphC   s    !
r8   r   arpackư>d   c          
   C   s:  |dkr,| j d dkr(|| dk r(d}nd}|dkrt|}|dd| j d }y t| || d	|||d
\}}	W n. tk
r }
 ztd|
 W dd}
~
X Y nX |	dd|df t||d fS |dkr*t| dr| 	 } t
| ||| d fdd\}}	tt|}|	dd|f t|fS td| dS )a  
    Find the null space of a matrix M.

    Parameters
    ----------
    M : {array, matrix, sparse matrix, LinearOperator}
        Input covariance matrix: should be symmetric positive semi-definite

    k : integer
        Number of eigenvalues/vectors to return

    k_skip : integer, optional
        Number of low eigenvalues to skip.

    eigen_solver : string, {'auto', 'arpack', 'dense'}
        auto : algorithm will attempt to choose the best method for input data
        arpack : use arnoldi iteration in shift-invert mode.
                    For this method, M may be a dense matrix, sparse matrix,
                    or general linear operator.
                    Warning: ARPACK can be unstable for some problems.  It is
                    best to try several random seeds in order to check results.
        dense  : use standard dense matrix operations for the eigenvalue
                    decomposition.  For this method, M must be an array
                    or matrix type.  This method should be avoided for
                    large problems.

    tol : float, optional
        Tolerance for 'arpack' method.
        Not used if eigen_solver=='dense'.

    max_iter : int
        Maximum number of iterations for 'arpack' method.
        Not used if eigen_solver=='dense'

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`. Used when ``solver`` == 'arpack'.

    autor      
   r9   denser   g        )Zsigmatolmaxiterv0zError in determining null-space with ARPACK. Error message: '%s'. Note that method='arpack' can fail when the weight matrix is singular or otherwise ill-behaved.  method='dense' is recommended. See online documentation for more information.NtoarrayT)ZeigvalsZoverwrite_azUnrecognized eigen_solver '%s')r   r   Zuniformr   RuntimeError
ValueErrorr   r    hasattrrD   r   Zargsortabs)Mkk_skipeigen_solverrA   max_iterrandom_staterC   Zeigen_valuesZeigen_vectorsmsgindexr.   r.   r/   
null_spacen   s.    +&

rQ   r<   standard-C6?-q=c       ;   	   C   s	  |dkrt d| |dkr(t d| t|d |d}||  |j} | j\}}||krbt d||krzt d||f |d	krt d
|dk}|dkrt||||d}|rt|jd|ji| }|j| 	 }n:|j| |j | 
 }|jdd|jd	 d   d7  < n|dkrR||d  d }||| krDt d|j| |d dd}|ddddf }tj|d| | ftjd}d|ddd	f< tj||ftjd}||k}xt|D ]z}| ||  }||d	8 }|rt|d	dd	 }n,t||j}t|d dddddf }|ddd|f |dddd| f< d| }xbt|D ]V}|dd||d f |dd||f  |dd||| | f< ||| 7 }q^W t|\}}|dd|d df }|d	}d|tt||k < || }t|| || \} }!|| |!f  t||j7  < qW |rt|}n|dkr||k rnt d|j| |d dd}|ddddf }t|||f}"t||}#t||#g}$||k}|rx@t|D ]4}| ||  | |  }%t|%dd\|"|< |$|< }&qW |$dC }$nnxlt|D ]`}| ||  | |  }%t|%|%j}'t|'\}(})|(ddd |$|< |)dddddf |"|< q"W d|$d }t|"d	ddt|}*|*ddd|#f  |$|dddf    < |*dd|#df  |dddf   < t||f}+x*t|D ]}t|"| |*| |+|< qW |+|+ddddf  }+|$dd|df d|$ddd|f d },t|,}-tj|t d}.t!|$d}/|/ddddf |/ddddf  d }0x0t|D ]$}t"|0|dddf |-|.|< qW |.||# 7 }.tj||ftjd}xTt|D ]F}|.| }1|"|dd||1 df }2tj#$|2d	t%|1 }3t&|1|3t|2jt| }4tj#$|4}5|5|	k r|4d	9 }4n|4|5 }4|2dt't|2|4|4  d|3 |+|dddf   }6t|| || \} }!|| |!f  t|6|6j7  < |6d}7|||| f  |78  < ||| |f  |78  < |||f  |17  < q8W |rt|}nd|dkr|j| |d dd}|ddddf }t||f}||k}xt|D ]
}| ||  }8|8|8d	8 }8|r"t|8ddd	 }9n,t|8|8j}t|d dddddf }9t||d f}|9ddd|f |ddddf< dt%| |ddd	f< t||j}:t|| || \} }!|| |!f  |:8  < ||| || f  d7  < qW t(||d||||
dS )a  Perform a Locally Linear Embedding analysis on the data.

    Read more in the :ref:`User Guide <locally_linear_embedding>`.

    Parameters
    ----------
    X : {array-like, NearestNeighbors}
        Sample data, shape = (n_samples, n_features), in the form of a
        numpy array or a NearestNeighbors object.

    n_neighbors : integer
        number of neighbors to consider for each point.

    n_components : integer
        number of coordinates for the manifold.

    reg : float
        regularization constant, multiplies the trace of the local covariance
        matrix of the distances.

    eigen_solver : string, {'auto', 'arpack', 'dense'}
        auto : algorithm will attempt to choose the best method for input data

        arpack : use arnoldi iteration in shift-invert mode.
                    For this method, M may be a dense matrix, sparse matrix,
                    or general linear operator.
                    Warning: ARPACK can be unstable for some problems.  It is
                    best to try several random seeds in order to check results.

        dense  : use standard dense matrix operations for the eigenvalue
                    decomposition.  For this method, M must be an array
                    or matrix type.  This method should be avoided for
                    large problems.

    tol : float, optional
        Tolerance for 'arpack' method
        Not used if eigen_solver=='dense'.

    max_iter : integer
        maximum number of iterations for the arpack solver.

    method : {'standard', 'hessian', 'modified', 'ltsa'}
        standard : use the standard locally linear embedding algorithm.
                   see reference [1]_
        hessian  : use the Hessian eigenmap method.  This method requires
                   n_neighbors > n_components * (1 + (n_components + 1) / 2.
                   see reference [2]_
        modified : use the modified locally linear embedding algorithm.
                   see reference [3]_
        ltsa     : use local tangent space alignment algorithm
                   see reference [4]_

    hessian_tol : float, optional
        Tolerance for Hessian eigenmapping method.
        Only used if method == 'hessian'

    modified_tol : float, optional
        Tolerance for modified LLE method.
        Only used if method == 'modified'

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`. Used when ``solver`` == 'arpack'.

    n_jobs : int or None, optional (default=None)
        The number of parallel jobs to run for neighbors search.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    Returns
    -------
    Y : array-like, shape [n_samples, n_components]
        Embedding vectors.

    squared_error : float
        Reconstruction error for the embedding vectors. Equivalent to
        ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.

    References
    ----------

    .. [1] `Roweis, S. & Saul, L. Nonlinear dimensionality reduction
        by locally linear embedding.  Science 290:2323 (2000).`
    .. [2] `Donoho, D. & Grimes, C. Hessian eigenmaps: Locally
        linear embedding techniques for high-dimensional data.
        Proc Natl Acad Sci U S A.  100:5591 (2003).`
    .. [3] `Zhang, Z. & Wang, J. MLLE: Modified Locally Linear
        Embedding Using Multiple Weights.`
        http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382
    .. [4] `Zhang, Z. & Zha, H. Principal manifolds and nonlinear
        dimensionality reduction via tangent space alignment.
        Journal of Shanghai Univ.  8:406 (2004)`
    )r<   r9   r?   zunrecognized eigen_solver '%s')rR   hessianmodifiedltsazunrecognized method '%s'r   )r%   r1   z>output dimension must be less than or equal to input dimensionzHExpected n_neighbors <= n_samples,  but n_samples = %d, n_neighbors = %dr   zn_neighbors must be positiver?   rR   )r%   r#   r1   formatNrU   r	   z^for method='hessian', n_neighbors must be greater than [n_components * (n_components + 3) / 2]F)r%   r2   )r   )Zfull_matricesr@   rV   z1modified LLE requires n_neighbors >= n_componentsTgMbP?rW   g      ?)rK   rL   rA   rM   rN   ))rF   r   r3   r4   r   r8   r   rX   r   ZtocsrrD   r   r5   r   r   Zfloat64ZzerosrangeZmeanr   r   r   r   r    whererH   Zmeshgridr   minr   r   Zmedianintr   ZsearchsortedZlinalgZnormZsqrtZfullZouterrQ   );r!   r%   n_componentsr#   rL   rA   rM   methodhessian_tolmodified_tolrN   r1   ZnbrsNZd_inZM_sparseWrI   Zdp	neighborsZYiZuse_svdr(   ZGiUZCijrJ   Qr,   r-   SZnbrs_xZnbrs_yVZnevZevalsZX_nbrs_ZC_nbrsZeviZviZtmpZw_regZrhoZetaZs_rangeZevals_cumsumZ	eta_rangeZs_iZViZalpha_ihZnorm_hZWiZWi_sum1ZXir'   ZGiGiTr.   r.   r/   locally_linear_embedding   s   d


&
(.
$



 ,(4

,$ 



$"
rk   c               @   s>   e Zd ZdZdddZdd ZdddZdddZdd ZdS )LocallyLinearEmbeddingao  Locally Linear Embedding

    Read more in the :ref:`User Guide <locally_linear_embedding>`.

    Parameters
    ----------
    n_neighbors : integer
        number of neighbors to consider for each point.

    n_components : integer
        number of coordinates for the manifold

    reg : float
        regularization constant, multiplies the trace of the local covariance
        matrix of the distances.

    eigen_solver : string, {'auto', 'arpack', 'dense'}
        auto : algorithm will attempt to choose the best method for input data

        arpack : use arnoldi iteration in shift-invert mode.
                    For this method, M may be a dense matrix, sparse matrix,
                    or general linear operator.
                    Warning: ARPACK can be unstable for some problems.  It is
                    best to try several random seeds in order to check results.

        dense  : use standard dense matrix operations for the eigenvalue
                    decomposition.  For this method, M must be an array
                    or matrix type.  This method should be avoided for
                    large problems.

    tol : float, optional
        Tolerance for 'arpack' method
        Not used if eigen_solver=='dense'.

    max_iter : integer
        maximum number of iterations for the arpack solver.
        Not used if eigen_solver=='dense'.

    method : string ('standard', 'hessian', 'modified' or 'ltsa')
        standard : use the standard locally linear embedding algorithm.  see
                   reference [1]
        hessian  : use the Hessian eigenmap method. This method requires
                   ``n_neighbors > n_components * (1 + (n_components + 1) / 2``
                   see reference [2]
        modified : use the modified locally linear embedding algorithm.
                   see reference [3]
        ltsa     : use local tangent space alignment algorithm
                   see reference [4]

    hessian_tol : float, optional
        Tolerance for Hessian eigenmapping method.
        Only used if ``method == 'hessian'``

    modified_tol : float, optional
        Tolerance for modified LLE method.
        Only used if ``method == 'modified'``

    neighbors_algorithm : string ['auto'|'brute'|'kd_tree'|'ball_tree']
        algorithm to use for nearest neighbors search,
        passed to neighbors.NearestNeighbors instance

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`. Used when ``eigen_solver`` == 'arpack'.

    n_jobs : int or None, optional (default=None)
        The number of parallel jobs to run.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    Attributes
    ----------
    embedding_ : array-like, shape [n_samples, n_components]
        Stores the embedding vectors

    reconstruction_error_ : float
        Reconstruction error associated with `embedding_`

    nbrs_ : NearestNeighbors object
        Stores nearest neighbors instance, including BallTree or KDtree
        if applicable.

    Examples
    --------
    >>> from sklearn.datasets import load_digits
    >>> from sklearn.manifold import LocallyLinearEmbedding
    >>> X, _ = load_digits(return_X_y=True)
    >>> X.shape
    (1797, 64)
    >>> embedding = LocallyLinearEmbedding(n_components=2)
    >>> X_transformed = embedding.fit_transform(X[:100])
    >>> X_transformed.shape
    (100, 2)

    References
    ----------

    .. [1] `Roweis, S. & Saul, L. Nonlinear dimensionality reduction
        by locally linear embedding.  Science 290:2323 (2000).`
    .. [2] `Donoho, D. & Grimes, C. Hessian eigenmaps: Locally
        linear embedding techniques for high-dimensional data.
        Proc Natl Acad Sci U S A.  100:5591 (2003).`
    .. [3] `Zhang, Z. & Wang, J. MLLE: Modified Locally Linear
        Embedding Using Multiple Weights.`
        http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382
    .. [4] `Zhang, Z. & Zha, H. Principal manifolds and nonlinear
        dimensionality reduction via tangent space alignment.
        Journal of Shanghai Univ.  8:406 (2004)`
       r	   MbP?r<   ư>r;   rR   -C6?-q=Nc             C   sL   || _ || _|| _|| _|| _|| _|| _|| _|	| _|| _	|
| _
|| _d S )N)r%   r]   r#   rL   rA   rM   r^   r_   r`   rN   neighbors_algorithmr1   )selfr%   r]   r#   rL   rA   rM   r^   r_   r`   rr   rN   r1   r.   r.   r/   __init__|  s    zLocallyLinearEmbedding.__init__c             C   sz   t | j| j| jd| _t| j}t|td}| j	| t
| j| j| j| j| j| j| j| j| j|| j| jd\| _| _d S )N)	algorithmr1   )r   )	rL   rA   rM   r^   r_   r`   rN   r#   r1   )r   r%   rr   r1   nbrs_r   rN   r   floatr3   rk   r]   rL   rA   rM   r^   r_   r`   r#   
embedding_Zreconstruction_error_)rs   r!   rN   r.   r.   r/   _fit_transform  s    
z%LocallyLinearEmbedding._fit_transformc             C   s   |  | | S )a  Compute the embedding vectors for data X

        Parameters
        ----------
        X : array-like of shape [n_samples, n_features]
            training set.

        y : Ignored

        Returns
        -------
        self : returns an instance of self.
        )ry   )rs   r!   yr.   r.   r/   r3     s    
zLocallyLinearEmbedding.fitc             C   s   |  | | jS )a-  Compute the embedding vectors for data X and transform X.

        Parameters
        ----------
        X : array-like of shape [n_samples, n_features]
            training set.

        y : Ignored

        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)
        )ry   rx   )rs   r!   rz   r.   r.   r/   fit_transform  s    
z$LocallyLinearEmbedding.fit_transformc             C   s   t | d t|}| jj|| jdd}t|| jj| | jd}t	|j
d | jf}x6t|j
d D ]$}t| j||  j|| ||< qdW |S )a  
        Transform new points into embedding space.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        X_new : array, shape = [n_samples, n_components]

        Notes
        -----
        Because of scaling performed by this method, it is discouraged to use
        it together with methods that are not scale-invariant (like SVMs)
        rv   F)r%   r2   )r#   r   )r   r   rv   r5   r%   r0   r4   r#   r   r   r   r]   rY   r   rx   r   )rs   r!   r6   ZweightsZX_newr(   r.   r.   r/   	transform  s    

$z LocallyLinearEmbedding.transform)rm   r	   rn   r<   ro   r;   rR   rp   rq   r<   NN)N)N)	__name__
__module____qualname____doc__rt   ry   r3   r{   r|   r.   r.   r.   r/   rl   	  s   q   


rl   )r   )r   N)r   r9   r:   r;   N)	r   r<   r:   r;   rR   rS   rT   NN) r   Znumpyr   Zscipy.linalgr   r   r   r   Zscipy.sparser   r   Zscipy.sparse.linalgr   baser
   r   r   Zutilsr   r   Zutils.extmathr   Zutils.validationr   r   rc   r   r0   r8   rQ   rk   rl   r.   r.   r.   r/   <module>   s*   
/
+ 
N  
  L