B
    	\	J                 @   s   d dl Zd dlZddlmZmZmZmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ edZdd ZG dd deeeZdS )    N   )BaseEstimatorMetaEstimatorMixinRegressorMixinclone)check_random_statecheck_arraycheck_consistent_length)sample_without_replacement)check_is_fitted   )LinearRegression)has_fit_parameter)ConvergenceWarningc             C   sj   | t | }ttd| }ttd||  }|dkr8dS |dkrHt dS tt tt|t| S )a  Determine number trials such that at least one outlier-free subset is
    sampled for the given inlier/outlier ratio.

    Parameters
    ----------
    n_inliers : int
        Number of inliers in the data.

    n_samples : int
        Total number of samples in the data.

    min_samples : int
        Minimum number of samples chosen randomly from original data.

    probability : float
        Probability (confidence) that one outlier-free sample is generated.

    Returns
    -------
    trials : int
        Number of trials.

    r   r   inf)floatmax_EPSILONabsnpceillog)Z	n_inliers	n_samplesmin_samplesZprobabilityZinlier_ratioZnomZdenom r   :lib/python3.7/site-packages/sklearn/linear_model/ransac.py_dynamic_max_trials   s    r   c               @   sR   e Zd ZdZddddddejejejdddfddZddd	Zd
d Zdd Z	dS )RANSACRegressora  RANSAC (RANdom SAmple Consensus) algorithm.

    RANSAC is an iterative algorithm for the robust estimation of parameters
    from a subset of inliers from the complete data set. More information can
    be found in the general documentation of linear models.

    A detailed description of the algorithm can be found in the documentation
    of the ``linear_model`` sub-package.

    Read more in the :ref:`User Guide <ransac_regression>`.

    Parameters
    ----------
    base_estimator : object, optional
        Base estimator object which implements the following methods:

         * `fit(X, y)`: Fit model to given training data and target values.
         * `score(X, y)`: Returns the mean accuracy on the given test data,
           which is used for the stop criterion defined by `stop_score`.
           Additionally, the score is used to decide which of two equally
           large consensus sets is chosen as the better one.
         * `predict(X)`: Returns predicted values using the linear model,
           which is used to compute residual error using loss function.

        If `base_estimator` is None, then
        ``base_estimator=sklearn.linear_model.LinearRegression()`` is used for
        target values of dtype float.

        Note that the current implementation only supports regression
        estimators.

    min_samples : int (>= 1) or float ([0, 1]), optional
        Minimum number of samples chosen randomly from original data. Treated
        as an absolute number of samples for `min_samples >= 1`, treated as a
        relative number `ceil(min_samples * X.shape[0]`) for
        `min_samples < 1`. This is typically chosen as the minimal number of
        samples necessary to estimate the given `base_estimator`. By default a
        ``sklearn.linear_model.LinearRegression()`` estimator is assumed and
        `min_samples` is chosen as ``X.shape[1] + 1``.

    residual_threshold : float, optional
        Maximum residual for a data sample to be classified as an inlier.
        By default the threshold is chosen as the MAD (median absolute
        deviation) of the target values `y`.

    is_data_valid : callable, optional
        This function is called with the randomly selected data before the
        model is fitted to it: `is_data_valid(X, y)`. If its return value is
        False the current randomly chosen sub-sample is skipped.

    is_model_valid : callable, optional
        This function is called with the estimated model and the randomly
        selected data: `is_model_valid(model, X, y)`. If its return value is
        False the current randomly chosen sub-sample is skipped.
        Rejecting samples with this function is computationally costlier than
        with `is_data_valid`. `is_model_valid` should therefore only be used if
        the estimated model is needed for making the rejection decision.

    max_trials : int, optional
        Maximum number of iterations for random sample selection.

    max_skips : int, optional
        Maximum number of iterations that can be skipped due to finding zero
        inliers or invalid data defined by ``is_data_valid`` or invalid models
        defined by ``is_model_valid``.

        .. versionadded:: 0.19

    stop_n_inliers : int, optional
        Stop iteration if at least this number of inliers are found.

    stop_score : float, optional
        Stop iteration if score is greater equal than this threshold.

    stop_probability : float in range [0, 1], optional
        RANSAC iteration stops if at least one outlier-free set of the training
        data is sampled in RANSAC. This requires to generate at least N
        samples (iterations)::

            N >= log(1 - probability) / log(1 - e**m)

        where the probability (confidence) is typically set to high value such
        as 0.99 (the default) and e is the current fraction of inliers w.r.t.
        the total number of samples.

    loss : string, callable, optional, default "absolute_loss"
        String inputs, "absolute_loss" and "squared_loss" are supported which
        find the absolute loss and squared loss per sample
        respectively.

        If ``loss`` is a callable, then it should be a function that takes
        two arrays as inputs, the true and predicted value and returns a 1-D
        array with the i-th value of the array corresponding to the loss
        on ``X[i]``.

        If the loss on a sample is greater than the ``residual_threshold``,
        then this sample is classified as an outlier.

    random_state : int, RandomState instance or None, optional, default None
        The generator used to initialize the centers.  If int, random_state is
        the seed used by the random number generator; If RandomState instance,
        random_state is the random number generator; If None, the random number
        generator is the RandomState instance used by `np.random`.

    Attributes
    ----------
    estimator_ : object
        Best fitted model (copy of the `base_estimator` object).

    n_trials_ : int
        Number of random selection trials until one of the stop criteria is
        met. It is always ``<= max_trials``.

    inlier_mask_ : bool array of shape [n_samples]
        Boolean mask of inliers classified as ``True``.

    n_skips_no_inliers_ : int
        Number of iterations skipped due to finding zero inliers.

        .. versionadded:: 0.19

    n_skips_invalid_data_ : int
        Number of iterations skipped due to invalid data defined by
        ``is_data_valid``.

        .. versionadded:: 0.19

    n_skips_invalid_model_ : int
        Number of iterations skipped due to an invalid model defined by
        ``is_model_valid``.

        .. versionadded:: 0.19

    Examples
    --------
    >>> from sklearn.linear_model import RANSACRegressor
    >>> from sklearn.datasets import make_regression
    >>> X, y = make_regression(
    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
    >>> reg = RANSACRegressor(random_state=0).fit(X, y)
    >>> reg.score(X, y) # doctest: +ELLIPSIS
    0.9885...
    >>> reg.predict(X[:1,])
    array([-31.9417...])

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/RANSAC
    .. [2] https://www.sri.com/sites/default/files/publications/ransac-publication.pdf
    .. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf
    Nd   gGz?absolute_lossc             C   sL   || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	|| _
|| _d S )N)base_estimatorr   residual_thresholdis_data_validis_model_valid
max_trials	max_skipsstop_n_inliers
stop_scorestop_probabilityrandom_stateloss)selfr    r   r!   r"   r#   r$   r%   r&   r'   r(   r*   r)   r   r   r   __init__   s    zRANSACRegressor.__init__c             C   s  t |dd}t |dd}t|| | jdk	r8t| j}nt }| jdkrX|jd d }nbd| j  k rndk rn nt| j|jd  }n0| jdkr| jd dkrt	d| j}nt	d	||jd krt	d
|jd  | j
dk s| j
dkrt	d| jdkrtt|t| }n| j}| jdkrP|jdkrFdd }ndd }nN| jdkr||jdkrrdd }ndd }n"t| jr| j}nt	d| j t| j}y|j|d W n t	k
r   Y nX t|d}	t|j}
|dk	r |	s t	d|
 |dk	rt|}d}tj }d}d}d}d| _d| _d| _|jd }t|}|j\}}d| _| j}x| j|k r|  jd7  _| j| j | j | jkrP t |||d}|| }|| }| j!dk	r| !||s|  jd7  _ql|dkr|"|| n|j"|||| d | j#dk	rF| #|||sF|  jd7  _ql|$|}|||}||k }t%|}||k r|  jd7  _ql|| }|| }|| }|&||}||kr||k rĐql|}|}|}|}|}t'|t(|||| j
}|| j)ks|| j*krlP qlW |dkrF| j| j | j | jkr<t	dnt	dn&| j| j | j | jkrlt+,dt- |"|| || _.|| _/| S )a  Fit estimator using RANSAC algorithm.

        Parameters
        ----------
        X : array-like or sparse matrix, shape [n_samples, n_features]
            Training data.

        y : array-like, shape = [n_samples] or [n_samples, n_targets]
            Target values.

        sample_weight : array-like, shape = [n_samples]
            Individual weights for each sample
            raises error if sample_weight is passed and base_estimator
            fit method does not support it.

        Raises
        ------
        ValueError
            If no valid consensus set could be found. This occurs if
            `is_data_valid` and `is_model_valid` return False for all
            `max_trials` randomly chosen sub-samples.

        Zcsr)Zaccept_sparseF)Z	ensure_2dNr   r   z4Absolute number of samples must be an integer value.z4Value for `min_samples` must be scalar and positive.zG`min_samples` may not be larger than number of samples: n_samples = %d.z+`stop_probability` must be in range [0, 1].r   c             S   s   t | | S )N)r   r   )y_truey_predr   r   r   <lambda>!  s    z%RANSACRegressor.fit.<locals>.<lambda>c             S   s   t jt | | ddS )Nr   )axis)r   sumr   )r-   r.   r   r   r   r/   #  s    Zsquared_lossc             S   s   | | d S )Nr   r   )r-   r.   r   r   r   r/   (  s    c             S   s   t j| | d ddS )Nr   r   )r0   )r   r1   )r-   r.   r   r   r   r/   *  s    zEloss should be 'absolute_loss', 'squared_loss' or a callable.Got %s. )r)   sample_weightz\%s does not support sample_weight. Samples weights are only used for the calibration itself.)r2   zRANSAC skipped more iterations than `max_skips` without finding a valid consensus set. Iterations were skipped because each randomly chosen sub-sample failed the passing criteria. See estimator attributes for diagnostics (n_skips*).zRANSAC could not find a valid consensus set. All `max_trials` iterations were skipped because each randomly chosen sub-sample failed the passing criteria. See estimator attributes for diagnostics (n_skips*).zRANSAC found a valid consensus set but exited early due to skipping more iterations than `max_skips`. See estimator attributes for diagnostics (n_skips*).)0r   r	   r    r   r   r   shaper   r   
ValueErrorr(   r!   Zmedianr   r*   ndimcallabler   r)   Z
set_paramsr   type__name__Zasarrayr   Zn_skips_no_inliers_Zn_skips_invalid_data_Zn_skips_invalid_model_ZarangeZ	n_trials_r$   r%   r
   r"   fitr#   predictr1   scoreminr   r&   r'   warningswarnr   
estimator_Zinlier_mask_)r+   Xyr2   r    r   r!   Zloss_functionr)   Zestimator_fit_has_sample_weightZestimator_nameZn_inliers_bestZ
score_bestZinlier_mask_bestZX_inlier_bestZy_inlier_bestr   Zsample_idxs_r$   Zsubset_idxsZX_subsetZy_subsetr.   Zresiduals_subsetZinlier_mask_subsetZn_inliers_subsetZinlier_idxs_subsetZX_inlier_subsetZy_inlier_subsetZscore_subsetr   r   r   r9      s    



























zRANSACRegressor.fitc             C   s   t | d | j|S )aK  Predict using the estimated model.

        This is a wrapper for `estimator_.predict(X)`.

        Parameters
        ----------
        X : numpy array of shape [n_samples, n_features]

        Returns
        -------
        y : array, shape = [n_samples] or [n_samples, n_targets]
            Returns predicted values.
        r?   )r   r?   r:   )r+   r@   r   r   r   r:     s    
zRANSACRegressor.predictc             C   s   t | d | j||S )a  Returns the score of the prediction.

        This is a wrapper for `estimator_.score(X, y)`.

        Parameters
        ----------
        X : numpy array or sparse matrix of shape [n_samples, n_features]
            Training data.

        y : array, shape = [n_samples] or [n_samples, n_targets]
            Target values.

        Returns
        -------
        z : float
            Score of the prediction.
        r?   )r   r?   r;   )r+   r@   rA   r   r   r   r;     s    
zRANSACRegressor.score)N)
r8   
__module____qualname____doc__r   r   r,   r9   r:   r;   r   r   r   r   r   7   s    
 cr   )Znumpyr   r=   baser   r   r   r   Zutilsr   r   r	   Zutils.randomr
   Zutils.validationr   r   r   
exceptionsr   Zspacingr   r   r   r   r   r   r   <module>   s   
"