B
    	\/                 @   s  d Z ddlmZ ddlmZ ddlZddlZddlmZm	Z	 ddl
ZddlmZmZ ddlmZ dd	lmZ d
dlmZ d
dlmZ d
dlmZ d
dlmZmZmZ d
dlmZ d
dlm Z  d
dl!m"Z" d
dl#m$Z$ d
dl%m&Z&m'Z'm(Z( d
dl%m)Z* e+ej,ej,e-e. dddddddddddd d!d"d#d$d%d&d'd(g d)Z/e+g g e. d)Z0d*d+ Z1d,d- Z2G d.d/ d/e"3eeZ4d0d1 Z5G d2d3 d3e6Z7d4d5 Z8G d6d7 d7e6Z9G d8d9 d9e6Z:G d:d; d;e6Z;G d<d= d=e6Z<dS )>z,Base and mixin classes for nearest neighbors    )partial)LooseVersionN)ABCMetaabstractmethod)
csr_matrixissparse   )BallTree)KDTree   )BaseEstimator)pairwise_distances_chunked)PAIRWISE_DISTANCE_FUNCTIONS)	check_X_ycheck_arraygen_even_slices)check_classification_targets)check_is_fitted)six)DataConversionWarning)Paralleldelayedeffective_n_jobs)__version__Z
braycurtisZcanberra	chebyshevZcorrelationZcosineZdiceZhammingZjaccardZ	kulsinskiZmahalanobisZmatching	minkowskiZrogerstanimotoZ
russellraoZ
seuclideanZsokalmichenerZsokalsneathZsqeuclideanZyule
wminkowski)	ball_treekd_treebrutec             C   s$   | dkr| S t | r| S tddS )z$Check to make sure weights are valid)NuniformdistancezOweights not recognized: should be 'uniform', 'distance', or a callable functionN)callable
ValueError)weights r%   5lib/python3.7/site-packages/sklearn/neighbors/base.py_check_weights3   s
    r'   c          	   C   s   |dkrdS |dkr| j t tkrhxt| D ]4\}}t|drVd|krV|dk| |< q.d| | |< q.W nDtjdd d|  } W dQ R X t| }tj|d	d
}|| | |< | S t|r|| S t	ddS )ax  Get the weights from an array of distances and a parameter ``weights``

    Parameters
    ===========
    dist : ndarray
        The input distances
    weights : {'uniform', 'distance' or a callable}
        The kind of weighting used

    Returns
    ========
    weights_arr : array of the same shape as ``dist``
        if ``weights == 'uniform'``, then returns None
    )Nr    Nr!   __contains__g        g      ?ignore)Zdivider   )axiszOweights not recognized: should be 'uniform', 'distance', or a callable function)
dtypenpobject	enumeratehasattrZerrstateZisinfanyr"   r#   )distr$   Zpoint_dist_iZ
point_distZinf_maskZinf_rowr%   r%   r&   _get_weights>   s"    
r2   c            	   @   s:   e Zd ZdZedddZd	d
 Zdd Zedd Z	dS )NeighborsBasez,Base class for nearest neighbors estimators.Nauto   r   r   c	       	      C   s<   || _ || _|| _|| _|| _|| _|| _|| _|   d S )N)	n_neighborsradius	algorithm	leaf_sizemetricmetric_paramspn_jobs_check_algorithm_metric)	selfr6   r7   r8   r9   r:   r<   r;   r=   r%   r%   r&   __init__m   s    zNeighborsBase.__init__c             C   s   | j dkrtd| j  | j dkrV| jdkr2d}q\t| jsJ| jtd krPd}q\d}n| j }t| jr| j dkrtd| j n | jt| krtd	| j|f | jd k	rd
| jkrtjdtdd | jd
 }n| j	}| jdkr|dk rtdd S )N)r4   r   r   r   zunrecognized algorithm: '%s'r4   precomputedr   r   r   z7kd_tree algorithm does not support callable metric '%s'zMetric '%s' not valid. Use sorted(sklearn.neighbors.VALID_METRICS['%s']) to get valid options. Metric can also be a callable function.r<   z\Parameter p is found in metric_params. The corresponding parameter from __init__ is ignored.   )
stacklevel)r   r   r   z/p must be greater than one for minkowski metric)
r8   r#   r:   r"   VALID_METRICSr;   warningswarnSyntaxWarningr<   )r?   Z	alg_checkeffective_pr%   r%   r&   r>   |   s2    






z%NeighborsBase._check_algorithm_metricc             C   s  |    | jd kri | _n| j | _| jd| j}| jdkrJ|| jd< | j| _| jdkr| jdd}|dk r|t	dn<|dkrd| _n,|dkrd| _n|t
jkrd	| _n
|| jd< t|tr|j| _|j| _|j| _| S t|tr|j| _|| _d
| _| S t|tr&|j| _|| _d| _| S t|dd}|jd }|dkrNt	dt|r| jdkrntd | jtd krt| jst	d| j | | _d | _d| _| S | j| _|| _| jdkrF| jd ks| j| jjd d k r@| jdkr@| jtd krd| _n*t| js0| jtd
 kr8d
| _nd| _nd| _| jd
krrt|| jfd| ji| j| _nN| jdkrt|| jfd| ji| j| _n"| jdkrd | _nt	d| j | jd k	r| jdkrt	d| j n(t
t | jt
j!st"dt | j | S )Nr<   )r   r   r   r   r   z/p must be greater than one for minkowski metricZ	manhattan	euclideanr   r   r   csr)accept_sparser   z n_samples must be greater than 0)r4   r   z4cannot use tree with sparse input: using brute forcer   zMetric '%s' not valid for sparse input. Use sorted(sklearn.neighbors.VALID_METRICS_SPARSE['brute']) to get valid options. Metric can also be a callable function.r4   rA   r:   zalgorithm = '%s' not recognizedz Expected n_neighbors > 0. Got %dz7n_neighbors does not take %s value, enter integer value)#r>   r;   effective_metric_params_copygetr<   r:   effective_metric_popr#   r,   inf
isinstancer3   _fit_X_tree_fit_methodr	   datar
   r   shaper   r8   rE   rF   VALID_METRICS_SPARSEr"   r6   rD   r9   
issubdtypetypeinteger	TypeError)r?   XrH   r<   	n_samplesr%   r%   r&   _fit   s    














zNeighborsBase._fitc             C   s
   | j dkS )NrA   )r:   )r?   r%   r%   r&   	_pairwise  s    zNeighborsBase._pairwise)NNr4   r5   r   r   NN)
__name__
__module____qualname____doc__r   r@   r>   r_   propertyr`   r%   r%   r%   r&   r3   j   s     (tr3   c             C   s   |  |||S )zHelper for the Parallel calls in KNeighborsMixin.kneighbors

    The Cython method tree.query is not directly picklable by cloudpickle
    under PyPy.
    )Zquery)treerV   r6   return_distancer%   r%   r&   _tree_query_parallel_helper  s    rh   c               @   s,   e Zd ZdZdd ZdddZdd	d
ZdS )KNeighborsMixinzMixin for k-neighbors searchesc             C   s   t |jd dddf }t j||d dd}|ddd|f }||t |||f f }|r| jdkrt |||f |f}q|||f |f}n|}|S )aW  Reduce a chunk of distances to the nearest neighbors

        Callback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`

        Parameters
        ----------
        dist : array of shape (n_samples_chunk, n_samples)
        start : int
            The index in X which the first row of dist corresponds to.
        n_neighbors : int
        return_distance : bool

        Returns
        -------
        dist : array of shape (n_samples_chunk, n_neighbors), optional
            Returned only if return_distance
        neigh : array of shape (n_samples_chunk, n_neighbors)
        r   Nr   )r*   rI   )r,   arangerW   ZargpartitionZargsortrO   sqrt)r?   r1   startr6   rg   sample_range	neigh_indresultr%   r%   r&   _kneighbors_reduce_func*  s    
z'KNeighborsMixin._kneighbors_reduce_funcNTc                s  t d dkrjn8dkr0td n"tttjsRtdt  dk	rld}t dd nd	}j	 d
7 j	j
d }|krtd|f  j
\}}t|dddf }tj}	jdkr$tjd}
jdkrdd	inj}tt j	f|
j|	d|}njdkrt rHtdj tttdk }tjdk sj|r|rtdnd}tt|dddi}nttddi}t|	f| fddt j
d |	D }ntdrt| \}}t|t|f}n
t|}|s|S r&|\}}n|}||k}tj |d
d}d|dddf |< t!|| |d
 f}rt!|| |d
 f}||fS |S dS )a  Finds the K-neighbors of a point.
        Returns indices of and distances to the neighbors of each point.

        Parameters
        ----------
        X : array-like, shape (n_query, n_features),                 or (n_query, n_indexed) if metric == 'precomputed'
            The query point or points.
            If not provided, neighbors of each indexed point are returned.
            In this case, the query point is not considered its own neighbor.

        n_neighbors : int
            Number of neighbors to get (default is the value
            passed to the constructor).

        return_distance : boolean, optional. Defaults to True.
            If False, distances will not be returned

        Returns
        -------
        dist : array
            Array representing the lengths to points, only present if
            return_distance=True

        ind : array
            Indices of the nearest points in the population matrix.

        Examples
        --------
        In the following example, we construct a NeighborsClassifier
        class from an array representing our data set and ask who's
        the closest point to [1,1,1]

        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
        >>> from sklearn.neighbors import NearestNeighbors
        >>> neigh = NearestNeighbors(n_neighbors=1)
        >>> neigh.fit(samples) # doctest: +ELLIPSIS
        NearestNeighbors(algorithm='auto', leaf_size=30, ...)
        >>> print(neigh.kneighbors([[1., 1., 1.]])) # doctest: +ELLIPSIS
        (array([[0.5]]), array([[2]]))

        As you can see, it returns [[0.5]], and [[2]], which means that the
        element is at distance 0.5 and is the third element of samples
        (indexes start at 0). You can also query for multiple points:

        >>> X = [[0., 1., 0.], [1., 0., 1.]]
        >>> neigh.kneighbors(X, return_distance=False) # doctest: +ELLIPSIS
        array([[1],
               [2]]...)

        rU   Nr   z Expected n_neighbors > 0. Got %dz7n_neighbors does not take %s value, enter integer valueFrJ   )rK   Tr   zHExpected n_neighbors <= n_samples,  but n_samples = %d, n_neighbors = %dr   )r6   rg   rI   squared)reduce_funcr:   r=   )r   r   zQ%s does not work with sparse matrices. Densify the data, or set algorithm='brute'z0.12)rB   )check_picklebackend	threadingpreferthreadsc             3   s"   | ]}j  | V  qd S )N)rT   ).0s)r]   delayed_queryr6   rg   r?   r%   r&   	<genexpr>  s   z-KNeighborsMixin.kneighbors.<locals>.<genexpr>z$internal: _fit_method not recognized)r*   )"r   r6   r#   r,   rY   rZ   r[   r\   r   rS   rW   rj   r   r=   rU   r   rp   rO   rL   listr   r   r   joblib_versionsysversion_infor   rh   r   r   zipZvstackallreshape)r?   r]   r6   rg   query_is_trainZ
train_sizer^   _rm   r=   rr   kwdsro   Z
old_joblibrs   parallel_kwargsr1   rn   Zsample_maskZdup_gr_nbrsr%   )r]   rz   r6   rg   r?   r&   
kneighborsM  s    4











zKNeighborsMixin.kneighborsconnectivityc             C   s   |dkr| j }|dk	r.t|dd}|jd }n| jjd }| jjd }|| }td|d |}|dkrt|| }| j||dd}	n4|d	kr| j||d
d\}}	t|}nt	d| t
||	 |f||fd}
|
S )a,  Computes the (weighted) graph of k-Neighbors for points in X

        Parameters
        ----------
        X : array-like, shape (n_query, n_features),                 or (n_query, n_indexed) if metric == 'precomputed'
            The query point or points.
            If not provided, neighbors of each indexed point are returned.
            In this case, the query point is not considered its own neighbor.

        n_neighbors : int
            Number of neighbors for each sample.
            (default is value passed to the constructor).

        mode : {'connectivity', 'distance'}, optional
            Type of returned matrix: 'connectivity' will return the
            connectivity matrix with ones and zeros, in 'distance' the
            edges are Euclidean distance between points.

        Returns
        -------
        A : sparse matrix in CSR format, shape = [n_samples, n_samples_fit]
            n_samples_fit is the number of samples in the fitted data
            A[i, j] is assigned the weight of edge that connects i to j.

        Examples
        --------
        >>> X = [[0], [3], [1]]
        >>> from sklearn.neighbors import NearestNeighbors
        >>> neigh = NearestNeighbors(n_neighbors=2)
        >>> neigh.fit(X) # doctest: +ELLIPSIS
        NearestNeighbors(algorithm='auto', leaf_size=30, ...)
        >>> A = neigh.kneighbors_graph(X)
        >>> A.toarray()
        array([[1., 0., 1.],
               [0., 1., 1.],
               [1., 0., 1.]])

        See also
        --------
        NearestNeighbors.radius_neighbors_graph
        NrJ   )rK   r   r   r   F)rg   r!   TzRUnsupported mode, must be one of "connectivity" or "distance" but got "%s" instead)rW   )r6   r   rW   rS   r,   rj   onesr   ravelr#   r   )r?   r]   r6   mode
n_samples1
n_samples2Z	n_nonzeroA_indptrA_dataA_indkneighbors_graphr%   r%   r&   r     s,    ,z KNeighborsMixin.kneighbors_graph)NNT)NNr   )ra   rb   rc   rd   rp   r   r   r%   r%   r%   r&   ri   '  s   #
 # ri   c             C   s   |  |||S )zHelper for the Parallel calls in RadiusNeighborsMixin.radius_neighbors

    The Cython method tree.query_radius is not directly picklable by
    cloudpickle under PyPy.
    )Zquery_radius)rf   rV   r7   rg   r%   r%   r&   "_tree_query_radius_parallel_helper>  s    r   c               @   s,   e Zd ZdZdd ZdddZdd	d
ZdS )RadiusNeighborsMixinz)Mixin for radius-based neighbors searchesc                s`   fdd|D  |rX| j dkr8 fddt|D }n fddt|D }| f}n }|S )a<  Reduce a chunk of distances to the nearest neighbors

        Callback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`

        Parameters
        ----------
        dist : array of shape (n_samples_chunk, n_samples)
        start : int
            The index in X which the first row of dist corresponds to.
        radius : float
        return_distance : bool

        Returns
        -------
        dist : list of n_samples_chunk 1d arrays, optional
            Returned only if return_distance
        neigh : list of n_samples_chunk 1d arrays
        c                s   g | ]}t | kd  qS )r   )r,   where)rx   d)r7   r%   r&   
<listcomp>^  s    zFRadiusNeighborsMixin._radius_neighbors_reduce_func.<locals>.<listcomp>rI   c                s"   g | ]\}}t | |  qS r%   )r,   rk   )rx   ir   )rn   r%   r&   r   b  s   c                s   g | ]\}}| |  qS r%   r%   )rx   r   r   )rn   r%   r&   r   e  s   )rO   r.   )r?   r1   rl   r7   rg   resultsr%   )rn   r7   r&   _radius_neighbors_reduce_funcJ  s    



z2RadiusNeighborsMixin._radius_neighbors_reduce_funcNTc                s~  t d  dk	r$d}t dd n
d}j dkr<jjdkr0jdkrd9 d	di}nj}tjd
}t	 jf|jj
d|}rt| \}}	t|g }
t|	g }tjt|
dd}|
|dd< tjt|dd}||dd< ||f}n(t|g }tjt|dd}||dd< n؈jdkr t rTtdj tj
}tttdk rttddddi}nttddi}t|f| fddt jd |D }rtt| \}}t|t|f}n
t|}ntd|s|S r"|\}}n|}x@t|D ]4\}}||k}|| ||< r0|| | ||< q0W rv||fS |S dS )a
  Finds the neighbors within a given radius of a point or points.

        Return the indices and distances of each point from the dataset
        lying in a ball with size ``radius`` around the points of the query
        array. Points lying on the boundary are included in the results.

        The result points are *not* necessarily sorted by distance to their
        query point.

        Parameters
        ----------
        X : array-like, (n_samples, n_features), optional
            The query point or points.
            If not provided, neighbors of each indexed point are returned.
            In this case, the query point is not considered its own neighbor.

        radius : float
            Limiting distance of neighbors to return.
            (default is the value passed to the constructor).

        return_distance : boolean, optional. Defaults to True.
            If False, distances will not be returned

        Returns
        -------
        dist : array, shape (n_samples,) of arrays
            Array representing the distances to each point, only present if
            return_distance=True. The distance values are computed according
            to the ``metric`` constructor parameter.

        ind : array, shape (n_samples,) of arrays
            An array of arrays of indices of the approximate nearest points
            from the population matrix that lie within a ball of size
            ``radius`` around the query points.

        Examples
        --------
        In the following example, we construct a NeighborsClassifier
        class from an array representing our data set and ask who's
        the closest point to [1, 1, 1]:

        >>> import numpy as np
        >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
        >>> from sklearn.neighbors import NearestNeighbors
        >>> neigh = NearestNeighbors(radius=1.6)
        >>> neigh.fit(samples) # doctest: +ELLIPSIS
        NearestNeighbors(algorithm='auto', leaf_size=30, ...)
        >>> rng = neigh.radius_neighbors([[1., 1., 1.]])
        >>> print(np.asarray(rng[0][0])) # doctest: +ELLIPSIS
        [1.5 0.5]
        >>> print(np.asarray(rng[1][0])) # doctest: +ELLIPSIS
        [1 2]

        The first array returned contains the distances to all points which
        are closer than 1.6, while the second array returned contains their
        indices.  In general, multiple points can be queried at the same time.

        Notes
        -----
        Because the number of neighbors of each point is not necessarily
        equal, the results for multiple query points cannot be fit in a
        standard data array.
        For efficiency, `radius_neighbors` returns arrays of objects, where
        each object is a 1D array of indices or distances.
        rU   NFrJ   )rK   Tr   rI   rq   )r7   rg   )rr   r:   r=   r-   )r+   )r   r   zQ%s does not work with sparse matrices. Densify the data, or set algorithm='brute'z0.12)rs   rt   ru   rv   rw   c             3   s"   | ]}j  | V  qd S )N)rT   )rx   ry   )r]   rz   r7   rg   r?   r%   r&   r{     s   z8RadiusNeighborsMixin.radius_neighbors.<locals>.<genexpr>r   z$internal: _fit_method not recognized)r   r   rS   r7   rU   rO   rL   r   r   r   r=   r   sumr,   emptylenr   r#   r   r   r}   r   r   r   r   rW   tupleZhstackr.   )r?   r]   r7   rg   r   r   rr   r   Zdist_chunksZneigh_ind_chunksZ	dist_listZneigh_ind_listr1   rn   r=   r   ZindZind_neighbormaskr%   )r]   rz   r7   rg   r?   r&   radius_neighborsl  s    B













z%RadiusNeighborsMixin.radius_neighborsr   c             C   s   |dk	rt |dddgd}| jjd }|dkr4| j}|dkrR| j||dd	}d}n8|d
kr~| j||dd	\}}tt|}ntd| |jd }t	dd |D }	tt|}|dkrt
t|}ttjdtdt|	f}
t|||
f||fdS )a  Computes the (weighted) graph of Neighbors for points in X

        Neighborhoods are restricted the points at a distance lower than
        radius.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features], optional
            The query point or points.
            If not provided, neighbors of each indexed point are returned.
            In this case, the query point is not considered its own neighbor.

        radius : float
            Radius of neighborhoods.
            (default is the value passed to the constructor).

        mode : {'connectivity', 'distance'}, optional
            Type of returned matrix: 'connectivity' will return the
            connectivity matrix with ones and zeros, in 'distance' the
            edges are Euclidean distance between points.

        Returns
        -------
        A : sparse matrix in CSR format, shape = [n_samples, n_samples]
            A[i, j] is assigned the weight of edge that connects i to j.

        Examples
        --------
        >>> X = [[0], [3], [1]]
        >>> from sklearn.neighbors import NearestNeighbors
        >>> neigh = NearestNeighbors(radius=1.5)
        >>> neigh.fit(X) # doctest: +ELLIPSIS
        NearestNeighbors(algorithm='auto', leaf_size=30, ...)
        >>> A = neigh.radius_neighbors_graph(X)
        >>> A.toarray()
        array([[1., 0., 1.],
               [0., 1., 0.],
               [1., 0., 1.]])

        See also
        --------
        kneighbors_graph
        NrJ   ZcscZcoo)rK   r   r   F)rg   r!   TzQUnsupported mode, must be one of "connectivity", or "distance" but got %s insteadc             S   s   g | ]}t |qS r%   )r   )rx   ar%   r%   r&   r   M  s    z?RadiusNeighborsMixin.radius_neighbors_graph.<locals>.<listcomp>r   )r+   )rW   )r   rS   rW   r7   r   r,   Zconcatenater|   r#   Zarrayr   r   ZzerosintZcumsumr   )r?   r]   r7   r   r   r   r   r1   r   r6   r   r%   r%   r&   radius_neighbors_graph  s2    ,

z+RadiusNeighborsMixin.radius_neighbors_graph)NNT)NNr   )ra   rb   rc   rd   r   r   r   r%   r%   r%   r&   r   G  s
   "
  r   c               @   s   e Zd Zdd ZdS )SupervisedFloatMixinc             C   s2   t |ttfs"t||ddd\}}|| _| |S )a  Fit the model using X as training data and y as target values

        Parameters
        ----------
        X : {array-like, sparse matrix, BallTree, KDTree}
            Training data. If array or matrix, shape [n_samples, n_features],
            or [n_samples, n_samples] if metric='precomputed'.

        y : {array-like, sparse matrix}
            Target values, array of float values, shape = [n_samples]
             or [n_samples, n_outputs]
        rJ   T)multi_output)rR   r
   r	   r   _yr_   )r?   r]   yr%   r%   r&   fitY  s    zSupervisedFloatMixin.fitN)ra   rb   rc   r   r%   r%   r%   r&   r   X  s   r   c               @   s   e Zd Zdd ZdS )SupervisedIntegerMixinc             C   s  t |ttfs"t||ddd\}}|jdksD|jdkrp|jd dkrp|jdkr^tjdtdd d| _	|
d	}nd| _	t| g | _tj|jtjd
| _xPt| jjd D ]<}tj|dd|f dd\}| jdd|f< | j| qW | j	s
| jd | _| j | _| |S )a  Fit the model using X as training data and y as target values

        Parameters
        ----------
        X : {array-like, sparse matrix, BallTree, KDTree}
            Training data. If array or matrix, shape [n_samples, n_features],
            or [n_samples, n_samples] if metric='precomputed'.

        y : {array-like, sparse matrix}
            Target values of shape = [n_samples] or [n_samples, n_outputs]

        rJ   T)r   r   r   zA column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().)rC   F)r   )r+   N)Zreturn_inverser   )rR   r
   r	   r   ndimrW   rE   rF   r   Zoutputs_2d_r   r   Zclasses_r,   r   r   r   rangeuniqueappendr   r_   )r?   r]   r   kclassesr%   r%   r&   r   m  s&    "

,zSupervisedIntegerMixin.fitN)ra   rb   rc   r   r%   r%   r%   r&   r   l  s   r   c               @   s   e Zd ZdddZdS )UnsupervisedMixinNc             C   s
   |  |S )a  Fit the model using X as training data

        Parameters
        ----------
        X : {array-like, sparse matrix, BallTree, KDTree}
            Training data. If array or matrix, shape [n_samples, n_features],
            or [n_samples, n_samples] if metric='precomputed'.
        )r_   )r?   r]   r   r%   r%   r&   r     s    	zUnsupervisedMixin.fit)N)ra   rb   rc   r   r%   r%   r%   r&   r     s   r   )=rd   	functoolsr   Zdistutils.versionr   r~   rE   abcr   r   Znumpyr,   Zscipy.sparser   r   r   r	   r   r
   baser   Zmetricsr   Zmetrics.pairwiser   Zutilsr   r   r   Zutils.multiclassr   Zutils.validationr   Z	externalsr   
exceptionsr   Zutils._joblibr   r   r   r   r}   dictZvalid_metricsr|   keysrD   rX   r'   r2   Zwith_metaclassr3   rh   r-   ri   r   r   r   r   r   r%   r%   r%   r&   <module>   s^   
, 5	  	  +