ó
‡ˆ\c           @   sÞ   d  d l  Z d  d l Z d d l m Z m Z m Z m Z d d l m	 Z	 m
 Z
 m Z d d l m Z d d l m Z d d l m Z d d	 l m Z d d
 l m Z e j d ƒ Z d „  Z d e e e f d „  ƒ  YZ d S(   iÿÿÿÿNi   (   t   BaseEstimatort   MetaEstimatorMixint   RegressorMixint   clone(   t   check_random_statet   check_arrayt   check_consistent_length(   t   sample_without_replacement(   t   check_is_fittedi   (   t   LinearRegression(   t   has_fit_parameter(   t   ConvergenceWarningc         C   s   |  t  | ƒ } t t d | ƒ } t t d | | ƒ } | d k rJ d S| d k r` t  d ƒ St t  t j t j | ƒ t j | ƒ ƒ ƒ ƒ S(   s  Determine number trials such that at least one outlier-free subset is
    sampled for the given inlier/outlier ratio.

    Parameters
    ----------
    n_inliers : int
        Number of inliers in the data.

    n_samples : int
        Total number of samples in the data.

    min_samples : int
        Minimum number of samples chosen randomly from original data.

    probability : float
        Probability (confidence) that one outlier-free sample is generated.

    Returns
    -------
    trials : int
        Number of trials.

    i   i    t   inf(   t   floatt   maxt   _EPSILONt   abst   npt   ceilt   log(   t	   n_inlierst	   n_samplest   min_samplest   probabilityt   inlier_ratiot   nomt   denom(    (    s:   lib/python2.7/site-packages/sklearn/linear_model/ransac.pyt   _dynamic_max_trials   s    
t   RANSACRegressorc           B   sb   e  Z d  Z d d d d d d e j e j e j d d d d „ Z d d „ Z d „  Z d „  Z	 RS(	   sò  RANSAC (RANdom SAmple Consensus) algorithm.

    RANSAC is an iterative algorithm for the robust estimation of parameters
    from a subset of inliers from the complete data set. More information can
    be found in the general documentation of linear models.

    A detailed description of the algorithm can be found in the documentation
    of the ``linear_model`` sub-package.

    Read more in the :ref:`User Guide <ransac_regression>`.

    Parameters
    ----------
    base_estimator : object, optional
        Base estimator object which implements the following methods:

         * `fit(X, y)`: Fit model to given training data and target values.
         * `score(X, y)`: Returns the mean accuracy on the given test data,
           which is used for the stop criterion defined by `stop_score`.
           Additionally, the score is used to decide which of two equally
           large consensus sets is chosen as the better one.
         * `predict(X)`: Returns predicted values using the linear model,
           which is used to compute residual error using loss function.

        If `base_estimator` is None, then
        ``base_estimator=sklearn.linear_model.LinearRegression()`` is used for
        target values of dtype float.

        Note that the current implementation only supports regression
        estimators.

    min_samples : int (>= 1) or float ([0, 1]), optional
        Minimum number of samples chosen randomly from original data. Treated
        as an absolute number of samples for `min_samples >= 1`, treated as a
        relative number `ceil(min_samples * X.shape[0]`) for
        `min_samples < 1`. This is typically chosen as the minimal number of
        samples necessary to estimate the given `base_estimator`. By default a
        ``sklearn.linear_model.LinearRegression()`` estimator is assumed and
        `min_samples` is chosen as ``X.shape[1] + 1``.

    residual_threshold : float, optional
        Maximum residual for a data sample to be classified as an inlier.
        By default the threshold is chosen as the MAD (median absolute
        deviation) of the target values `y`.

    is_data_valid : callable, optional
        This function is called with the randomly selected data before the
        model is fitted to it: `is_data_valid(X, y)`. If its return value is
        False the current randomly chosen sub-sample is skipped.

    is_model_valid : callable, optional
        This function is called with the estimated model and the randomly
        selected data: `is_model_valid(model, X, y)`. If its return value is
        False the current randomly chosen sub-sample is skipped.
        Rejecting samples with this function is computationally costlier than
        with `is_data_valid`. `is_model_valid` should therefore only be used if
        the estimated model is needed for making the rejection decision.

    max_trials : int, optional
        Maximum number of iterations for random sample selection.

    max_skips : int, optional
        Maximum number of iterations that can be skipped due to finding zero
        inliers or invalid data defined by ``is_data_valid`` or invalid models
        defined by ``is_model_valid``.

        .. versionadded:: 0.19

    stop_n_inliers : int, optional
        Stop iteration if at least this number of inliers are found.

    stop_score : float, optional
        Stop iteration if score is greater equal than this threshold.

    stop_probability : float in range [0, 1], optional
        RANSAC iteration stops if at least one outlier-free set of the training
        data is sampled in RANSAC. This requires to generate at least N
        samples (iterations)::

            N >= log(1 - probability) / log(1 - e**m)

        where the probability (confidence) is typically set to high value such
        as 0.99 (the default) and e is the current fraction of inliers w.r.t.
        the total number of samples.

    loss : string, callable, optional, default "absolute_loss"
        String inputs, "absolute_loss" and "squared_loss" are supported which
        find the absolute loss and squared loss per sample
        respectively.

        If ``loss`` is a callable, then it should be a function that takes
        two arrays as inputs, the true and predicted value and returns a 1-D
        array with the i-th value of the array corresponding to the loss
        on ``X[i]``.

        If the loss on a sample is greater than the ``residual_threshold``,
        then this sample is classified as an outlier.

    random_state : int, RandomState instance or None, optional, default None
        The generator used to initialize the centers.  If int, random_state is
        the seed used by the random number generator; If RandomState instance,
        random_state is the random number generator; If None, the random number
        generator is the RandomState instance used by `np.random`.

    Attributes
    ----------
    estimator_ : object
        Best fitted model (copy of the `base_estimator` object).

    n_trials_ : int
        Number of random selection trials until one of the stop criteria is
        met. It is always ``<= max_trials``.

    inlier_mask_ : bool array of shape [n_samples]
        Boolean mask of inliers classified as ``True``.

    n_skips_no_inliers_ : int
        Number of iterations skipped due to finding zero inliers.

        .. versionadded:: 0.19

    n_skips_invalid_data_ : int
        Number of iterations skipped due to invalid data defined by
        ``is_data_valid``.

        .. versionadded:: 0.19

    n_skips_invalid_model_ : int
        Number of iterations skipped due to an invalid model defined by
        ``is_model_valid``.

        .. versionadded:: 0.19

    Examples
    --------
    >>> from sklearn.linear_model import RANSACRegressor
    >>> from sklearn.datasets import make_regression
    >>> X, y = make_regression(
    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
    >>> reg = RANSACRegressor(random_state=0).fit(X, y)
    >>> reg.score(X, y) # doctest: +ELLIPSIS
    0.9885...
    >>> reg.predict(X[:1,])
    array([-31.9417...])

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/RANSAC
    .. [2] https://www.sri.com/sites/default/files/publications/ransac-publication.pdf
    .. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf
    id   g®Gáz®ï?t   absolute_lossc         C   sp   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ |	 |  _ |
 |  _	 | |  _
 | |  _ d  S(   N(   t   base_estimatorR   t   residual_thresholdt   is_data_validt   is_model_validt
   max_trialst	   max_skipst   stop_n_inlierst
   stop_scoret   stop_probabilityt   random_statet   loss(   t   selfR   R   R   R    R!   R"   R#   R$   R%   R&   R(   R'   (    (    s:   lib/python2.7/site-packages/sklearn/linear_model/ransac.pyt   __init__Ð   s    											c         C   s  t  | d d ƒ} t  | d t ƒ} t | | ƒ |  j d k	 rR t |  j ƒ } n	 t ƒ  } |  j d k r~ | j d d } nˆ d |  j k  o˜ d k  n r½ t	 j
 |  j | j d ƒ } nI |  j d k rú |  j d d k rî t d ƒ ‚ n  |  j } n t d ƒ ‚ | | j d k r3t d | j d ƒ ‚ n  |  j d k  sQ|  j d k r`t d	 ƒ ‚ n  |  j d k r—t	 j t	 j | t	 j | ƒ ƒ ƒ } n	 |  j } |  j d
 k rÖ| j d k rÊd „  } q:d „  } nd |  j d k r| j d k r d „  } q:d „  } n. t |  j ƒ r'|  j } n t d |  j ƒ ‚ t |  j ƒ } y | j d | ƒ Wn t k
 rpn Xt | d ƒ }	 t | ƒ j }
 | d k	 rµ|	 rµt d |
 ƒ ‚ n  | d k	 rÓt	 j | ƒ } n  d } t	 j } d } d } d } d |  _ d |  _ d |  _ | j d } t	 j | ƒ } | j \ } } d |  _ |  j  } x"|  j | k  rq|  j d 7_ |  j |  j |  j |  j! k r’Pn  t" | | d | ƒ} | | } | | } |  j# d k	 rò|  j# | | ƒ rò|  j d 7_ qPn  | d k r| j$ | | ƒ n | j$ | | d | | ƒ|  j% d k	 re|  j% | | | ƒ re|  j d 7_ qPn  | j& | ƒ } | | | ƒ } | | k  } t	 j' | ƒ } | | k  r¿|  j d 7_ qPn  | | } | | } | | } | j( | | ƒ } | | k r| | k  rqPn  | } | } | } | } | } t) | t* | | | |  j ƒ ƒ } | |  j+ k sj| |  j, k rPPqPqPW| d k r¼|  j |  j |  j |  j! k r­t d ƒ ‚ qït d ƒ ‚ n3 |  j |  j |  j |  j! k rït- j. d t/ ƒ n  | j$ | | ƒ | |  _0 | |  _1 |  S(   sè  Fit estimator using RANSAC algorithm.

        Parameters
        ----------
        X : array-like or sparse matrix, shape [n_samples, n_features]
            Training data.

        y : array-like, shape = [n_samples] or [n_samples, n_targets]
            Target values.

        sample_weight : array-like, shape = [n_samples]
            Individual weights for each sample
            raises error if sample_weight is passed and base_estimator
            fit method does not support it.

        Raises
        ------
        ValueError
            If no valid consensus set could be found. This occurs if
            `is_data_valid` and `is_model_valid` return False for all
            `max_trials` randomly chosen sub-samples.

        t   accept_sparset   csrt	   ensure_2di   i    s4   Absolute number of samples must be an integer value.s4   Value for `min_samples` must be scalar and positive.sG   `min_samples` may not be larger than number of samples: n_samples = %d.s+   `stop_probability` must be in range [0, 1].R   c         S   s   t  j |  | ƒ S(   N(   R   R   (   t   y_truet   y_pred(    (    s:   lib/python2.7/site-packages/sklearn/linear_model/ransac.pyt   <lambda>!  s    c         S   s    t  j t  j |  | ƒ d d ƒS(   Nt   axisi   (   R   t   sumR   (   R.   R/   (    (    s:   lib/python2.7/site-packages/sklearn/linear_model/ransac.pyR0   #  s    t   squared_lossc         S   s   |  | d S(   Ni   (    (   R.   R/   (    (    s:   lib/python2.7/site-packages/sklearn/linear_model/ransac.pyR0   (  s    c         S   s   t  j |  | d d d ƒS(   Ni   R1   i   (   R   R2   (   R.   R/   (    (    s:   lib/python2.7/site-packages/sklearn/linear_model/ransac.pyR0   *  s    sE   loss should be 'absolute_loss', 'squared_loss' or a callable.Got %s. R'   t   sample_weights\   %s does not support sample_weight. Samples weights are only used for the calibration itself.sè   RANSAC skipped more iterations than `max_skips` without finding a valid consensus set. Iterations were skipped because each randomly chosen sub-sample failed the passing criteria. See estimator attributes for diagnostics (n_skips*).sÏ   RANSAC could not find a valid consensus set. All `max_trials` iterations were skipped because each randomly chosen sub-sample failed the passing criteria. See estimator attributes for diagnostics (n_skips*).sš   RANSAC found a valid consensus set but exited early due to skipping more iterations than `max_skips`. See estimator attributes for diagnostics (n_skips*).N(2   R   t   FalseR   R   t   NoneR   R	   R   t   shapeR   R   t
   ValueErrorR&   R   t   medianR   R(   t   ndimt   callableR   R'   t
   set_paramsR
   t   typet   __name__t   asarrayR   t   n_skips_no_inliers_t   n_skips_invalid_data_t   n_skips_invalid_model_t   aranget	   n_trials_R"   R#   R   R    t   fitR!   t   predictR2   t   scoret   minR   R$   R%   t   warningst   warnR   t
   estimator_t   inlier_mask_(   R)   t   Xt   yR4   R   R   R   t   loss_functionR'   t   estimator_fit_has_sample_weightt   estimator_namet   n_inliers_bestt
   score_bestt   inlier_mask_bestt   X_inlier_bestt   y_inlier_bestR   t   sample_idxst   _R"   t   subset_idxst   X_subsett   y_subsetR/   t   residuals_subsett   inlier_mask_subsett   n_inliers_subsett   inlier_idxs_subsett   X_inlier_subsett   y_inlier_subsett   score_subset(    (    s:   lib/python2.7/site-packages/sklearn/linear_model/ransac.pyRE   ä   sü    	 (		
						




				
		c         C   s   t  |  d ƒ |  j j | ƒ S(   sK  Predict using the estimated model.

        This is a wrapper for `estimator_.predict(X)`.

        Parameters
        ----------
        X : numpy array of shape [n_samples, n_features]

        Returns
        -------
        y : array, shape = [n_samples] or [n_samples, n_targets]
            Returns predicted values.
        RK   (   R   RK   RF   (   R)   RM   (    (    s:   lib/python2.7/site-packages/sklearn/linear_model/ransac.pyRF   Æ  s    c         C   s    t  |  d ƒ |  j j | | ƒ S(   s§  Returns the score of the prediction.

        This is a wrapper for `estimator_.score(X, y)`.

        Parameters
        ----------
        X : numpy array or sparse matrix of shape [n_samples, n_features]
            Training data.

        y : array, shape = [n_samples] or [n_samples, n_targets]
            Target values.

        Returns
        -------
        z : float
            Score of the prediction.
        RK   (   R   RK   RG   (   R)   RM   RN   (    (    s:   lib/python2.7/site-packages/sklearn/linear_model/ransac.pyRG   Ø  s    N(
   R>   t
   __module__t   __doc__R6   R   R   R*   RE   RF   RG   (    (    (    s:   lib/python2.7/site-packages/sklearn/linear_model/ransac.pyR   7   s   —â	(   t   numpyR   RI   t   baseR    R   R   R   t   utilsR   R   R   t   utils.randomR   t   utils.validationR   R	   R
   t
   exceptionsR   t   spacingR   R   R   (    (    (    s:   lib/python2.7/site-packages/sklearn/linear_model/ransac.pyt   <module>   s   "	"