B
    	\d                 @   s  d Z ddlmZmZ ddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZmZmZ ddlmZ d	d
lmZ d	dlmZ d	dlmZ d	dlmZ d	dlmZ d	dlmZ d	dlmZ d	dlmZ d	dlmZ d	dlmZ d	dl m!Z! d	dl"m#Z# d	dl$m%Z% d4ddZ&d5ddZ'dd Z(d6dd Z)d!d" Z*d7d$d%Z+G d&d' d'e!,eeZ-G d(d) d)e-eZ.G d*d+ d+ee-Z/G d,d- d-eZ0G d.d/ d/eZ1G d0d1 d1e1eZ2G d2d3 d3ee1Z3dS )8z
Ridge regression
    )ABCMetaabstractmethodN)linalg)sparse   )LinearClassifierMixinLinearModel_rescale_data)
sag_solver   )RegressorMixin)safe_sparse_dot)	row_norms)	check_X_y)check_array)check_consistent_length)compute_sample_weight)column_or_1d)LabelBinarizer)GridSearchCV)six)check_scoring)ConvergenceWarningMbP?c          
      s  | j \}}t|  tj|j d |f| jd}||krD fdd}	n fdd}	xRt|j d D ]>}
|d d |
f }|	||
 }||krtj||f|| jd}ytj|||dd\}}W n( t	k
r   tj|||d	\}}Y nX  
|||
< nr 
|}tj||f|| jd}y tj||||dd
\||
< }W n0 t	k
rf   tj||||d\||
< }Y nX |dk r~td| |d krb|dkrb|rbtd| t qbW |S )Nr   )dtypec                s    fdd}|S )Nc                s      | |   S )N)matvecrmatvec)x)X1
curr_alpha 9lib/python3.7/site-packages/sklearn/linear_model/ridge.py_mv,   s    z0_solve_sparse_cg.<locals>.create_mv.<locals>._mvr    )r   r"   )r   )r   r!   	create_mv+   s    z#_solve_sparse_cg.<locals>.create_mvc                s    fdd}|S )Nc                s      | |   S )N)r   r   )r   )r   r   r    r!   r"   1   s    z0_solve_sparse_cg.<locals>.create_mv.<locals>._mvr    )r   r"   )r   )r   r!   r#   0   s    )r   r   Zlegacy)tolatol)r$   )maxiterr$   r%   )r&   r$   r   zFailed with error code %dz/sparse_cg did not converge after %d iterations.)shape	sp_linalgZaslinearoperatornpemptyr   rangeZLinearOperatorZcg	TypeErrorr   
ValueErrorwarningswarnr   )Xyalphamax_iterr$   verbose	n_samples
n_featurescoefsr#   iy_columnZmvCcoefinfor    )r   r!   _solve_sparse_cg%   s@    





r=   c          	   C   s   | j \}}tj|j d |f| jd}tj|j d tjd}t|}	xXt|j d D ]F}
|d d |
f }tj| ||	|
 |||d}|d ||
< |d ||
< qTW ||fS )Nr   )r   )Zdampr%   ZbtolZiter_limr   r   )	r'   r)   r*   r   int32sqrtr+   r(   lsqr)r0   r1   r2   r3   r$   r5   r6   r7   n_iterZ
sqrt_alphar8   r9   r<   r    r    r!   _solve_lsqr^   s    

rB   c             C   s  | j \}}|j d }t| j| dd}t| j|dd}t|t||d g }|r|jd d |d   |d 7  < tj||dddjS tj	||g| j
d}	xrt|	|j|D ]`\}
}}|jd d |d   |7  < tj||ddd |
d d < |jd d |d   |8  < qW |	S d S )Nr   T)dense_outputr   )sym_posoverwrite_a)r   F)r'   r   Tr)   Zarray_equallenflatr   solver*   r   zipravel)r0   r1   r2   r5   r6   	n_targetsAZXy	one_alphar7   r;   targetcurrent_alphar    r    r!   _solve_choleskyp   s     

 


 rQ   Fc             C   s  | j d }|j d }|r |  } t|}||d k }t|tjpL|dk}|rtt|}	||	d d tjf  }| t	|	|	9 } |r8| j
d d |d   |d 7  < ytj| |ddd}
W n2 tjjk
r   td t| |d }
Y nX | j
d d |d   |d 8  < |r4|
|	d d tjf 9 }
|
S t||g| j}xtt||j|D ]b\}
}}| j
d d |d   |7  < tj| |ddd |
d d < | j
d d |d   |8  < qZW |r||	tjd d f 9 }|jS d S )Nr   r   )g      ?NTF)rD   rE   zNSingular matrix in solving dual problem. Using least-squares solution instead.)r'   copyr)   
atleast_1dall
isinstanceZndarrayr?   newaxisZouterrH   r   rI   LinAlgErrorr.   r/   Zlstsqr*   r   rJ   rF   rK   )Kr1   r2   sample_weightrR   r5   rL   rN   has_swsw	dual_coefZ
dual_coefsrO   rP   r    r    r!   _solve_cholesky_kernel   sB    


 

 
"r]   c             C   s   t j| dd\}}}|dk}|| d d tjf }t|j|}tj|j|jf| jd}	||d |  |	|< |	| }
t|j|
jS )NF)full_matricesgV瞯<)r   r   )	r   svdr)   rV   dotrF   zerossizer   )r0   r1   r2   UsZVtidxZs_nnzZUTydZd_UT_yr    r    r!   
_solve_svd   s    rg   autoc             C   s>  |
r,t | r,|dkr,|dkr(td d}tjtjg}|dkrht| dgtjdd} t|tjdd	d
}n$t| dddg|d} t|| jdd}t	| | | j
\}}|jdkrtdt|j
 d}|jdkr|dd}d}|j
\}}||krtd||f |dk	}|dkr*t | r |r&d}nd}|rdt|jdkrJtd|dkrdt| ||\} }tj|| jd }|jd|gkrtd|j|f |jdkr|dkrt||}|dkrtd| d}|dkrt| |||||}n|dkrt| ||||\}}n|dkr||kr~t| | jdd}y"t|||}t| j|ddj}W n tjk
rz   d}Y nX n.yt| ||}W n tjk
r   d}Y nX n|dkrt| dd  }t|j
d |f}tj|j
d tj d}t!|j
d f}xt"t#||jD ]\}\}}d!t!|t$|
 dfi}t%| | |d"|d#||||d|||d$kd%\}}}|
r|dd ||< |d ||< n|||< |||< qW |j
d# dkr|d# }t|}|dkrt | rt&d&t'| ||}|r| }|	r|
r|||fS |
r(||fS |	r6||fS |S dS )'a  Solve the ridge equation by the method of normal equations.

    Read more in the :ref:`User Guide <ridge_regression>`.

    Parameters
    ----------
    X : {array-like, sparse matrix, LinearOperator},
        shape = [n_samples, n_features]
        Training data

    y : array-like, shape = [n_samples] or [n_samples, n_targets]
        Target values

    alpha : {float, array-like},
        shape = [n_targets] if array-like
        Regularization strength; must be a positive float. Regularization
        improves the conditioning of the problem and reduces the variance of
        the estimates. Larger values specify stronger regularization.
        Alpha corresponds to ``C^-1`` in other linear models such as
        LogisticRegression or LinearSVC. If an array is passed, penalties are
        assumed to be specific to the targets. Hence they must correspond in
        number.

    sample_weight : float or numpy array of shape [n_samples]
        Individual weights for each sample. If sample_weight is not None and
        solver='auto', the solver will be set to 'cholesky'.

        .. versionadded:: 0.17

    solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}
        Solver to use in the computational routines:

        - 'auto' chooses the solver automatically based on the type of data.

        - 'svd' uses a Singular Value Decomposition of X to compute the Ridge
          coefficients. More stable for singular matrices than
          'cholesky'.

        - 'cholesky' uses the standard scipy.linalg.solve function to
          obtain a closed-form solution via a Cholesky decomposition of
          dot(X.T, X)

        - 'sparse_cg' uses the conjugate gradient solver as found in
          scipy.sparse.linalg.cg. As an iterative algorithm, this solver is
          more appropriate than 'cholesky' for large-scale data
          (possibility to set `tol` and `max_iter`).

        - 'lsqr' uses the dedicated regularized least-squares routine
          scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative
          procedure.

        - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses
          its improved, unbiased version named SAGA. Both methods also use an
          iterative procedure, and are often faster than other solvers when
          both n_samples and n_features are large. Note that 'sag' and
          'saga' fast convergence is only guaranteed on features with
          approximately the same scale. You can preprocess the data with a
          scaler from sklearn.preprocessing.


        All last five solvers support both dense and sparse data. However, only
        'sag' and 'saga' supports sparse input when`fit_intercept` is True.

        .. versionadded:: 0.17
           Stochastic Average Gradient descent solver.
        .. versionadded:: 0.19
           SAGA solver.

    max_iter : int, optional
        Maximum number of iterations for conjugate gradient solver.
        For the 'sparse_cg' and 'lsqr' solvers, the default value is determined
        by scipy.sparse.linalg. For 'sag' and saga solver, the default value is
        1000.

    tol : float
        Precision of the solution.

    verbose : int
        Verbosity level. Setting verbose > 0 will display additional
        information depending on the solver used.

    random_state : int, RandomState instance or None, optional, default None
        The seed of the pseudo random number generator to use when shuffling
        the data.  If int, random_state is the seed used by the random number
        generator; If RandomState instance, random_state is the random number
        generator; If None, the random number generator is the RandomState
        instance used by `np.random`. Used when ``solver`` == 'sag'.

    return_n_iter : boolean, default False
        If True, the method also returns `n_iter`, the actual number of
        iteration performed by the solver.

        .. versionadded:: 0.17

    return_intercept : boolean, default False
        If True and if X is sparse, the method also returns the intercept,
        and the solver is automatically changed to 'sag'. This is only a
        temporary fix for fitting the intercept with sparse data. For dense
        data, use sklearn.linear_model._preprocess_data before your regression.

        .. versionadded:: 0.17

    Returns
    -------
    coef : array, shape = [n_features] or [n_targets, n_features]
        Weight vector(s).

    n_iter : int, optional
        The actual number of iteration performed by the solver.
        Only returned if `return_n_iter` is True.

    intercept : float or array, shape = [n_targets]
        The intercept of the model. Only returned if `return_intercept`
        is True and if X is a scipy sparse array.

    Notes
    -----
    This function won't compute the intercept.
    sagrh   zIn Ridge, only 'sag' solver can currently fit the intercept when X is sparse. Solver has been automatically changed into 'sag'.)ri   sagacsrr:   )accept_sparser   orderFF)r   	ensure_2drm   csccoo)rl   r   )r   ro   r   zTarget y has the wrong shape %sr   Tz:Number of samples in X and y does not correspond: %d != %dNcholesky	sparse_cgz)Sample weights must be 1D array or scalar)r   zENumber of targets and number of penalties do not correspond: %d != %d)rt   rs   r_   r@   ri   rj   zSolver %s not understoodr@   )rC   r_   )squaredr;   ru   r   rj   )Zis_sagaz3SVD solver does not support sparse inputs currently)(r   issparser.   r/   r)   float64float32r   r   r   r'   ndimr-   strreshaperS   r	   asarrayrK   rb   repeatr=   rB   r   rF   r]   r   rW   rQ   r   maxr*   r>   ra   	enumeraterJ   intr
   r,   rg   )r0   r1   r2   rY   solverr3   r$   r4   random_statereturn_n_iterreturn_intercept_dtyper5   r6   rK   Z
n_samples_rL   rZ   rA   r;   rX   r\   Zmax_squared_sumZ	interceptr8   Zalpha_irO   Zinitcoef_n_iter__r    r    r!   ridge_regression   s    z
















r   c            	   @   s$   e Zd ZedddZdd	d
ZdS )
_BaseRidge      ?TFNMbP?rh   c	       	      C   s4   || _ || _|| _|| _|| _|| _|| _|| _d S )N)r2   fit_intercept	normalizecopy_Xr3   r$   r   r   )	selfr2   r   r   r   r3   r$   r   r   r    r    r!   __init__  s    z_BaseRidge.__init__c             C   s  | j dkrtj}ntjtjg}t||dddg|ddd\}}|d k	r\t|jdkr\td| j||| j	| j
| j|d	\}}}}}t|r| j	rt||| j|| j| j| j | jddd

\| _| _| _|  j|7  _n<t||| j|| j| j| j | jddd

\| _| _| ||| | S )N)ri   rj   rk   rp   rq   T)r   multi_output	y_numericr   z)Sample weights must be 1D array or scalar)rY   )r2   rY   r3   r$   r   r   r   r   F)r   r)   rw   rx   r   rS   ry   r-   _preprocess_datar   r   r   r   rv   r   r2   r3   r$   r   r   r   
intercept__set_intercept)r   r0   r1   rY   r   X_offsety_offsetX_scaler    r    r!   fit  s2    


z_BaseRidge.fit)r   TFTNr   rh   N)N)__name__
__module____qualname__r   r   r   r    r    r    r!   r     s
     
r   c                   s0   e Zd ZdZd fdd		Zd fd
d	Z  ZS )Ridgea  Linear least squares with l2 regularization.

    Minimizes the objective function::

    ||y - Xw||^2_2 + alpha * ||w||^2_2

    This model solves a regression model where the loss function is
    the linear least squares function and regularization is given by
    the l2-norm. Also known as Ridge Regression or Tikhonov regularization.
    This estimator has built-in support for multi-variate regression
    (i.e., when y is a 2d-array of shape [n_samples, n_targets]).

    Read more in the :ref:`User Guide <ridge_regression>`.

    Parameters
    ----------
    alpha : {float, array-like}, shape (n_targets)
        Regularization strength; must be a positive float. Regularization
        improves the conditioning of the problem and reduces the variance of
        the estimates. Larger values specify stronger regularization.
        Alpha corresponds to ``C^-1`` in other linear models such as
        LogisticRegression or LinearSVC. If an array is passed, penalties are
        assumed to be specific to the targets. Hence they must correspond in
        number.

    fit_intercept : boolean
        Whether to calculate the intercept for this model. If set
        to false, no intercept will be used in calculations
        (e.g. data is expected to be already centered).

    normalize : boolean, optional, default False
        This parameter is ignored when ``fit_intercept`` is set to False.
        If True, the regressors X will be normalized before regression by
        subtracting the mean and dividing by the l2-norm.
        If you wish to standardize, please use
        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
        on an estimator with ``normalize=False``.

    copy_X : boolean, optional, default True
        If True, X will be copied; else, it may be overwritten.

    max_iter : int, optional
        Maximum number of iterations for conjugate gradient solver.
        For 'sparse_cg' and 'lsqr' solvers, the default value is determined
        by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.

    tol : float
        Precision of the solution.

    solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}
        Solver to use in the computational routines:

        - 'auto' chooses the solver automatically based on the type of data.

        - 'svd' uses a Singular Value Decomposition of X to compute the Ridge
          coefficients. More stable for singular matrices than
          'cholesky'.

        - 'cholesky' uses the standard scipy.linalg.solve function to
          obtain a closed-form solution.

        - 'sparse_cg' uses the conjugate gradient solver as found in
          scipy.sparse.linalg.cg. As an iterative algorithm, this solver is
          more appropriate than 'cholesky' for large-scale data
          (possibility to set `tol` and `max_iter`).

        - 'lsqr' uses the dedicated regularized least-squares routine
          scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative
          procedure.

        - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses
          its improved, unbiased version named SAGA. Both methods also use an
          iterative procedure, and are often faster than other solvers when
          both n_samples and n_features are large. Note that 'sag' and
          'saga' fast convergence is only guaranteed on features with
          approximately the same scale. You can preprocess the data with a
          scaler from sklearn.preprocessing.

        All last five solvers support both dense and sparse data. However,
        only 'sag' and 'saga' supports sparse input when `fit_intercept` is
        True.

        .. versionadded:: 0.17
           Stochastic Average Gradient descent solver.
        .. versionadded:: 0.19
           SAGA solver.

    random_state : int, RandomState instance or None, optional, default None
        The seed of the pseudo random number generator to use when shuffling
        the data.  If int, random_state is the seed used by the random number
        generator; If RandomState instance, random_state is the random number
        generator; If None, the random number generator is the RandomState
        instance used by `np.random`. Used when ``solver`` == 'sag'.

        .. versionadded:: 0.17
           *random_state* to support Stochastic Average Gradient.

    Attributes
    ----------
    coef_ : array, shape (n_features,) or (n_targets, n_features)
        Weight vector(s).

    intercept_ : float | array, shape = (n_targets,)
        Independent term in decision function. Set to 0.0 if
        ``fit_intercept = False``.

    n_iter_ : array or None, shape (n_targets,)
        Actual number of iterations for each target. Available only for
        sag and lsqr solvers. Other solvers will return None.

        .. versionadded:: 0.17

    See also
    --------
    RidgeClassifier : Ridge classifier
    RidgeCV : Ridge regression with built-in cross validation
    :class:`sklearn.kernel_ridge.KernelRidge` : Kernel ridge regression
        combines ridge regression with the kernel trick

    Examples
    --------
    >>> from sklearn.linear_model import Ridge
    >>> import numpy as np
    >>> n_samples, n_features = 10, 5
    >>> np.random.seed(0)
    >>> y = np.random.randn(n_samples)
    >>> X = np.random.randn(n_samples, n_features)
    >>> clf = Ridge(alpha=1.0)
    >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE
    Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
          normalize=False, random_state=None, solver='auto', tol=0.001)

          ?TFNMbP?rh   c	       	   
      s$   t t| j||||||||d d S )N)r2   r   r   r   r3   r$   r   r   )superr   r   )	r   r2   r   r   r   r3   r$   r   r   )	__class__r    r!   r     s    zRidge.__init__c                s   t t| j|||dS )a  Fit Ridge regression model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data

        y : array-like, shape = [n_samples] or [n_samples, n_targets]
            Target values

        sample_weight : float or numpy array of shape [n_samples]
            Individual weights for each sample

        Returns
        -------
        self : returns an instance of self.
        )rY   )r   r   r   )r   r0   r1   rY   )r   r    r!   r     s    z	Ridge.fit)r   TFTNr   rh   N)N)r   r   r   __doc__r   r   __classcell__r    r    )r   r!   r     s      r   c            	       s<   e Zd ZdZd fdd		Zd fd
d	Zedd Z  ZS )RidgeClassifierae  Classifier using Ridge regression.

    Read more in the :ref:`User Guide <ridge_regression>`.

    Parameters
    ----------
    alpha : float
        Regularization strength; must be a positive float. Regularization
        improves the conditioning of the problem and reduces the variance of
        the estimates. Larger values specify stronger regularization.
        Alpha corresponds to ``C^-1`` in other linear models such as
        LogisticRegression or LinearSVC.

    fit_intercept : boolean
        Whether to calculate the intercept for this model. If set to false, no
        intercept will be used in calculations (e.g. data is expected to be
        already centered).

    normalize : boolean, optional, default False
        This parameter is ignored when ``fit_intercept`` is set to False.
        If True, the regressors X will be normalized before regression by
        subtracting the mean and dividing by the l2-norm.
        If you wish to standardize, please use
        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
        on an estimator with ``normalize=False``.

    copy_X : boolean, optional, default True
        If True, X will be copied; else, it may be overwritten.

    max_iter : int, optional
        Maximum number of iterations for conjugate gradient solver.
        The default value is determined by scipy.sparse.linalg.

    tol : float
        Precision of the solution.

    class_weight : dict or 'balanced', optional
        Weights associated with classes in the form ``{class_label: weight}``.
        If not given, all classes are supposed to have weight one.

        The "balanced" mode uses the values of y to automatically adjust
        weights inversely proportional to class frequencies in the input data
        as ``n_samples / (n_classes * np.bincount(y))``

    solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}
        Solver to use in the computational routines:

        - 'auto' chooses the solver automatically based on the type of data.

        - 'svd' uses a Singular Value Decomposition of X to compute the Ridge
          coefficients. More stable for singular matrices than
          'cholesky'.

        - 'cholesky' uses the standard scipy.linalg.solve function to
          obtain a closed-form solution.

        - 'sparse_cg' uses the conjugate gradient solver as found in
          scipy.sparse.linalg.cg. As an iterative algorithm, this solver is
          more appropriate than 'cholesky' for large-scale data
          (possibility to set `tol` and `max_iter`).

        - 'lsqr' uses the dedicated regularized least-squares routine
          scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative
          procedure.

        - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses
          its unbiased and more flexible version named SAGA. Both methods
          use an iterative procedure, and are often faster than other solvers
          when both n_samples and n_features are large. Note that 'sag' and
          'saga' fast convergence is only guaranteed on features with
          approximately the same scale. You can preprocess the data with a
          scaler from sklearn.preprocessing.

          .. versionadded:: 0.17
             Stochastic Average Gradient descent solver.
          .. versionadded:: 0.19
           SAGA solver.

    random_state : int, RandomState instance or None, optional, default None
        The seed of the pseudo random number generator to use when shuffling
        the data.  If int, random_state is the seed used by the random number
        generator; If RandomState instance, random_state is the random number
        generator; If None, the random number generator is the RandomState
        instance used by `np.random`. Used when ``solver`` == 'sag'.

    Attributes
    ----------
    coef_ : array, shape (n_features,) or (n_classes, n_features)
        Weight vector(s).

    intercept_ : float | array, shape = (n_targets,)
        Independent term in decision function. Set to 0.0 if
        ``fit_intercept = False``.

    n_iter_ : array or None, shape (n_targets,)
        Actual number of iterations for each target. Available only for
        sag and lsqr solvers. Other solvers will return None.

    Examples
    --------
    >>> from sklearn.datasets import load_breast_cancer
    >>> from sklearn.linear_model import RidgeClassifier
    >>> X, y = load_breast_cancer(return_X_y=True)
    >>> clf = RidgeClassifier().fit(X, y)
    >>> clf.score(X, y) # doctest: +ELLIPSIS
    0.9595...

    See also
    --------
    Ridge : Ridge regression
    RidgeClassifierCV :  Ridge classifier with built-in cross validation

    Notes
    -----
    For multi-class classification, n_class classifiers are trained in
    a one-versus-all approach. Concretely, this is implemented by taking
    advantage of the multi-variate response support in Ridge.
          ?TFNMbP?rh   c
       
   
      s*   t t| j||||||||	d || _d S )N)r2   r   r   r   r3   r$   r   r   )r   r   r   class_weight)
r   r2   r   r   r   r3   r$   r   r   r   )r   r    r!   r   #  s
    
zRidgeClassifier.__init__c                s   t ||dddgdd tddd| _| j|}| jjd	sLt|dd
}ntd| jj	 | j
r~|dkrnd}|t| j
| }tt| j|||d | S )a  Fit Ridge regression model.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples,n_features]
            Training data

        y : array-like, shape = [n_samples]
            Target values

        sample_weight : float or numpy array of shape (n_samples,)
            Sample weight.

            .. versionadded:: 0.17
               *sample_weight* support to Classifier.

        Returns
        -------
        self : returns an instance of self.
        rk   rp   rq   T)rl   r   r   rr   )	pos_label	neg_label
multilabel)r/   z-%s doesn't support multi-label classificationNg      ?)rY   )r   r   _label_binarizerfit_transformy_type_
startswithr   r-   r   r   r   r   r   r   r   )r   r0   r1   rY   Y)r   r    r!   r   ,  s     zRidgeClassifier.fitc             C   s   | j jS )N)r   classes_)r   r    r    r!   r   X  s    zRidgeClassifier.classes_)	r   TFTNr   Nrh   N)N)	r   r   r   r   r   r   propertyr   r   r    r    )r   r!   r     s   v  ,r   c               @   sx   e Zd ZdZdddZddd	Zd
d Zdd Zdd Zdd Z	dd Z
d ddZdd Zdd Zdd Zd!ddZdS )"	_RidgeGCVa,  Ridge regression with built-in Generalized Cross-Validation

    It allows efficient Leave-One-Out cross-validation.

    This class is not intended to be used directly. Use RidgeCV instead.

    Notes
    -----

    We want to solve (K + alpha*Id)c = y,
    where K = X X^T is the kernel matrix.

    Let G = (K + alpha*Id)^-1.

    Dual solution: c = Gy
    Primal solution: w = X^T c

    Compute eigendecomposition K = Q V Q^T.
    Then G = Q (V + alpha*Id)^-1 Q^T,
    where (V + alpha*Id) is diagonal.
    It is thus inexpensive to inverse for many alphas.

    Let loov be the vector of prediction values for each example
    when the model was fitted with all examples but this example.

    loov = (KGY - diag(KG)Y) / diag(I-KG)

    Let looe be the vector of prediction errors for each example
    when the model was fitted with all examples but this example.

    looe = y - loov = c / diag(G)

    References
    ----------
    http://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf
    https://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf
    g?g      ?g      $@TFNc             C   s4   t || _|| _|| _|| _|| _|| _|| _d S )N)	r)   r|   alphasr   r   scoringr   gcv_modestore_cv_values)r   r   r   r   r   r   r   r   r    r    r!   r     s    z_RidgeGCV.__init__c             C   sH   t ||jdd}|r"|t|7 }t|\}}t|j|}|||fS )NT)rC   )r   rF   r)   Z	ones_liker   Zeighr`   )r   r0   r1   centered_kernelrX   vQQT_yr    r    r!   _pre_compute  s    z_RidgeGCV._pre_computec             C   s   ||d  j ddS )Nr   rr   )axis)sum)r   Zv_primer   r    r    r!   _decomp_diag  s    z_RidgeGCV._decomp_diagc             C   s:   t |jdkr2|td ftjft |jd    }|| S )Nr   )rG   r'   slicer)   rV   )r   DBr    r    r!   	_diag_dot  s    $z_RidgeGCV._diag_dotc       
      C   sl   d||  }t |ddk }d||< t || ||}| ||}	t|jdkrd|	ddt jf }	|	|fS )zHelper function to avoid code duplication between self._errors and
        self._values.

        Notes
        -----
        We don't construct matrix G, instead compute action on y & diagonal.
        g      ?r   g-q=r   N)r)   varr`   r   r   rG   r'   rV   )
r   r2   r1   r   r   r   wconstant_columncG_diagr    r    r!   _errors_and_values_helper  s    z#_RidgeGCV._errors_and_values_helperc             C   s&   |  |||||\}}|| d |fS )Nr   )r   )r   r2   r1   r   r   r   r   r   r    r    r!   _errors  s    z_RidgeGCV._errorsc             C   s&   |  |||||\}}|||  |fS )N)r   )r   r2   r1   r   r   r   r   r   r    r    r!   _values  s    z_RidgeGCV._valuesc       	      C   sh   t |rtd|r4t|t|jd dff}tj|dd\}}}|d }t	|j
|}|||fS )Nz%SVD not supported for sparse matricesr   r   )r^   r   )r   rv   r,   r)   ZhstackZonesr'   r   r_   r`   rF   )	r   r0   r1   r   rc   rd   r   r   UT_yr    r    r!   _pre_compute_svd  s    
z_RidgeGCV._pre_compute_svdc       
      C   s   t |ddk }|| d |d  }|d  ||< t || |||d |  }| |||d  }	t|jdkr|	ddt jf }	|	|fS )ziHelper function to avoid code duplication between self._errors_svd
        and self._values_svd.
        r   g-q=rr   r   N)r)   r   r`   r   r   rG   r'   rV   )
r   r2   r1   r   rc   r   r   r   r   r   r    r    r!   _errors_and_values_svd_helper  s     z'_RidgeGCV._errors_and_values_svd_helperc             C   s&   |  |||||\}}|| d |fS )Nr   )r   )r   r2   r1   r   rc   r   r   r   r    r    r!   _errors_svd  s    z_RidgeGCV._errors_svdc             C   s&   |  |||||\}}|||  |fS )N)r   )r   r2   r1   r   rc   r   r   r   r    r    r!   _values_svd  s    z_RidgeGCV._values_svdc                s  t |dddgtjddd\}|dk	r>t|ts>t|dd}|j\}}tj|| j	| j
| j|d	\}}}}| j}	tt|}
|	dks|	d
krt|s||ks|
rd}	qd}	n|	dkr|
rtd d}	|	dkr| j}| j}| j}n*|	dkr| j}| j}| j}ntd|	 |dk	r.t||\}t| o@| j	}|||\}}}tjdkrhdnjd }t|| t| jf g }t| | jdddk}t| jdk rtd| jxnt | jD ]`\}}|r|t||||\}}n|t||||\}}|!  dd|f< |"| qW |rP j#dd$ }nHdd dd _%dd _& fddt't| jD }t(|}| j| | _)|| | _*t+| j*j,|| _-| .||| | j/rtjdkr|t| jf}n||t| jf} 0|| _1| S )a  Fit Ridge regression model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training data

        y : array-like, shape = [n_samples] or [n_samples, n_targets]
            Target values. Will be cast to X's dtype if necessary

        sample_weight : float or array-like of shape [n_samples]
            Sample weight

        Returns
        -------
        self : object
        rk   rp   rq   T)r   r   r   NF)ro   )rY   rh   Zeigenr_   zFnon-uniform sample weights unsupported for svd, forcing usage of eigenzbad gcv_mode "%s"r   )r   Z
allow_noner   zIalphas cannot be negative. Got {} containing some negative value instead.)r   c               S   s   d S )Nr    r    r    r    r!   identity_estimator=  s    z)_RidgeGCV.fit.<locals>.identity_estimatorc             S   s   | S )Nr    )	y_predictr    r    r!   <lambda>?  s    z_RidgeGCV.fit.<locals>.<lambda>c             S   s   | S )Nr    )r   r    r    r!   r   @  s    c                s(   g | ] }   d d |f qS )N)rK   ).0r8   )	cv_valuesr   scorerr1   r    r!   
<listcomp>B  s   z!_RidgeGCV.fit.<locals>.<listcomp>)2r   r)   rw   rU   floatr   r'   r   r   r   r   r   r   rG   r   rv   r.   r/   r   r   r   r   r   r   r-   r	   ra   r   r   r   anyformatr   rK   appendZmeanZargminZdecision_functionZpredictr+   Zargmaxalpha_Z
dual_coef_r   rF   r   r   r   r{   
cv_values_)r   r0   r1   rY   r5   r6   r   r   r   r   Zwith_swr   r   r   r   r   r   r   Zn_yr:   errorr8   r2   outr   ZbestZcv_values_shaper    )r   r   r   r1   r!   r     s|    








z_RidgeGCV.fit)r   TFNTNF)T)T)N)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r    r    r!   r   ]  s    %   
	

r   c               @   s    e Zd Zd	ddZd
ddZdS )_BaseRidgeCVg?g      ?g      $@TFNc             C   s4   t || _|| _|| _|| _|| _|| _|| _d S )N)	r)   r|   r   r   r   r   cvr   r   )r   r   r   r   r   r   r   r   r    r    r!   r   W  s    z_BaseRidgeCV.__init__c             C   s   | j dkrRt| j| j| j| j| j| jd}|j|||d |j	| _	| jr|j
| _
nX| jr`tdd| ji}tt| j| jd|| j | jd}|j|||d |j}|jj| _	|j| _|j| _| S )a  Fit Ridge regression model

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training data

        y : array-like, shape = [n_samples] or [n_samples, n_targets]
            Target values. Will be cast to X's dtype if necessary

        sample_weight : float or array-like of shape [n_samples]
            Sample weight

        Returns
        -------
        self : object
        N)r   r   r   r   r   )rY   z3cv!=None and store_cv_values=True  are incompatibler2   )r   r   )r   r   )r   r   r   r   r   r   r   r   r   r   r   r-   r   r   Zbest_estimator_r2   r   r   )r   r0   r1   rY   Z	estimatorZ
parametersZgsr    r    r!   r   c  s.    




z_BaseRidgeCV.fit)r   TFNNNF)N)r   r   r   r   r   r    r    r    r!   r   V  s
      
	r   c               @   s   e Zd ZdZdS )RidgeCVa  Ridge regression with built-in cross-validation.

    See glossary entry for :term:`cross-validation estimator`.

    By default, it performs Generalized Cross-Validation, which is a form of
    efficient Leave-One-Out cross-validation.

    Read more in the :ref:`User Guide <ridge_regression>`.

    Parameters
    ----------
    alphas : numpy array of shape [n_alphas]
        Array of alpha values to try.
        Regularization strength; must be a positive float. Regularization
        improves the conditioning of the problem and reduces the variance of
        the estimates. Larger values specify stronger regularization.
        Alpha corresponds to ``C^-1`` in other linear models such as
        LogisticRegression or LinearSVC.

    fit_intercept : boolean
        Whether to calculate the intercept for this model. If set
        to false, no intercept will be used in calculations
        (e.g. data is expected to be already centered).

    normalize : boolean, optional, default False
        This parameter is ignored when ``fit_intercept`` is set to False.
        If True, the regressors X will be normalized before regression by
        subtracting the mean and dividing by the l2-norm.
        If you wish to standardize, please use
        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
        on an estimator with ``normalize=False``.

    scoring : string, callable or None, optional, default: None
        A string (see model evaluation documentation) or
        a scorer callable object / function with signature
        ``scorer(estimator, X, y)``.

    cv : int, cross-validation generator or an iterable, optional
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:

        - None, to use the efficient Leave-One-Out cross-validation
        - integer, to specify the number of folds.
        - :term:`CV splitter`,
        - An iterable yielding (train, test) splits as arrays of indices.

        For integer/None inputs, if ``y`` is binary or multiclass,
        :class:`sklearn.model_selection.StratifiedKFold` is used, else,
        :class:`sklearn.model_selection.KFold` is used.

        Refer :ref:`User Guide <cross_validation>` for the various
        cross-validation strategies that can be used here.

    gcv_mode : {None, 'auto', 'svd', eigen'}, optional
        Flag indicating which strategy to use when performing
        Generalized Cross-Validation. Options are::

            'auto' : use svd if n_samples > n_features or when X is a sparse
                     matrix, otherwise use eigen
            'svd' : force computation via singular value decomposition of X
                    (does not work for sparse matrices)
            'eigen' : force computation via eigendecomposition of X^T X

        The 'auto' mode is the default and is intended to pick the cheaper
        option of the two depending upon the shape and format of the training
        data.

    store_cv_values : boolean, default=False
        Flag indicating if the cross-validation values corresponding to
        each alpha should be stored in the ``cv_values_`` attribute (see
        below). This flag is only compatible with ``cv=None`` (i.e. using
        Generalized Cross-Validation).

    Attributes
    ----------
    cv_values_ : array, shape = [n_samples, n_alphas] or         shape = [n_samples, n_targets, n_alphas], optional
        Cross-validation values for each alpha (if ``store_cv_values=True``        and ``cv=None``). After ``fit()`` has been called, this attribute         will contain the mean squared errors (by default) or the values         of the ``{loss,score}_func`` function (if provided in the constructor).

    coef_ : array, shape = [n_features] or [n_targets, n_features]
        Weight vector(s).

    intercept_ : float | array, shape = (n_targets,)
        Independent term in decision function. Set to 0.0 if
        ``fit_intercept = False``.

    alpha_ : float
        Estimated regularization parameter.

    Examples
    --------
    >>> from sklearn.datasets import load_diabetes
    >>> from sklearn.linear_model import RidgeCV
    >>> X, y = load_diabetes(return_X_y=True)
    >>> clf = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)
    >>> clf.score(X, y) # doctest: +ELLIPSIS
    0.5166...

    See also
    --------
    Ridge : Ridge regression
    RidgeClassifier : Ridge classifier
    RidgeClassifierCV : Ridge classifier with built-in cross validation
    N)r   r   r   r   r    r    r    r!   r     s   kr   c                   s8   e Zd ZdZd fdd	Zddd	Zed
d Z  ZS )RidgeClassifierCVa  Ridge classifier with built-in cross-validation.

    See glossary entry for :term:`cross-validation estimator`.

    By default, it performs Generalized Cross-Validation, which is a form of
    efficient Leave-One-Out cross-validation. Currently, only the n_features >
    n_samples case is handled efficiently.

    Read more in the :ref:`User Guide <ridge_regression>`.

    Parameters
    ----------
    alphas : numpy array of shape [n_alphas]
        Array of alpha values to try.
        Regularization strength; must be a positive float. Regularization
        improves the conditioning of the problem and reduces the variance of
        the estimates. Larger values specify stronger regularization.
        Alpha corresponds to ``C^-1`` in other linear models such as
        LogisticRegression or LinearSVC.

    fit_intercept : boolean
        Whether to calculate the intercept for this model. If set
        to false, no intercept will be used in calculations
        (e.g. data is expected to be already centered).

    normalize : boolean, optional, default False
        This parameter is ignored when ``fit_intercept`` is set to False.
        If True, the regressors X will be normalized before regression by
        subtracting the mean and dividing by the l2-norm.
        If you wish to standardize, please use
        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
        on an estimator with ``normalize=False``.

    scoring : string, callable or None, optional, default: None
        A string (see model evaluation documentation) or
        a scorer callable object / function with signature
        ``scorer(estimator, X, y)``.

    cv : int, cross-validation generator or an iterable, optional
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:

        - None, to use the efficient Leave-One-Out cross-validation
        - integer, to specify the number of folds.
        - :term:`CV splitter`,
        - An iterable yielding (train, test) splits as arrays of indices.

        Refer :ref:`User Guide <cross_validation>` for the various
        cross-validation strategies that can be used here.

    class_weight : dict or 'balanced', optional
        Weights associated with classes in the form ``{class_label: weight}``.
        If not given, all classes are supposed to have weight one.

        The "balanced" mode uses the values of y to automatically adjust
        weights inversely proportional to class frequencies in the input data
        as ``n_samples / (n_classes * np.bincount(y))``

    store_cv_values : boolean, default=False
        Flag indicating if the cross-validation values corresponding to
        each alpha should be stored in the ``cv_values_`` attribute (see
        below). This flag is only compatible with ``cv=None`` (i.e. using
        Generalized Cross-Validation).

    Attributes
    ----------
    cv_values_ : array, shape = [n_samples, n_targets, n_alphas], optional
        Cross-validation values for each alpha (if ``store_cv_values=True`` and
        ``cv=None``). After ``fit()`` has been called, this attribute will
        contain the mean squared errors (by default) or the values of the
        ``{loss,score}_func`` function (if provided in the constructor).

    coef_ : array, shape = [n_features] or [n_targets, n_features]
        Weight vector(s).

    intercept_ : float | array, shape = (n_targets,)
        Independent term in decision function. Set to 0.0 if
        ``fit_intercept = False``.

    alpha_ : float
        Estimated regularization parameter

    Examples
    --------
    >>> from sklearn.datasets import load_breast_cancer
    >>> from sklearn.linear_model import RidgeClassifierCV
    >>> X, y = load_breast_cancer(return_X_y=True)
    >>> clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)
    >>> clf.score(X, y) # doctest: +ELLIPSIS
    0.9630...

    See also
    --------
    Ridge : Ridge regression
    RidgeClassifier : Ridge classifier
    RidgeCV : Ridge regression with built-in cross validation

    Notes
    -----
    For multi-class classification, n_class classifiers are trained in
    a one-versus-all approach. Concretely, this is implemented by taking
    advantage of the multi-variate response support in Ridge.
    g?g      ?g      $@TFNc                s&   t t| j||||||d || _d S )N)r   r   r   r   r   r   )r   r   r   r   )r   r   r   r   r   r   r   r   )r   r    r!   r   j  s    
zRidgeClassifierCV.__init__c             C   s   t ||dddgdd tddd| _| j|}| jjd	sJt|dd
}| jrl|dkr\d}|t| j| }t	j
| |||d | S )a   Fit the ridge classifier.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape (n_samples,)
            Target values. Will be cast to X's dtype if necessary

        sample_weight : float or numpy array of shape (n_samples,)
            Sample weight.

        Returns
        -------
        self : object
        rk   rp   rq   T)rl   r   r   rr   )r   r   r   )r/   Ng      ?)rY   )r   r   r   r   r   r   r   r   r   r   r   )r   r0   r1   rY   r   r    r    r!   r   r  s    zRidgeClassifierCV.fitc             C   s   | j jS )N)r   r   )r   r    r    r!   r     s    zRidgeClassifierCV.classes_)r   TFNNNF)N)	r   r   r   r   r   r   r   r   r   r    r    )r   r!   r     s   g  
%r   )Nr   r   )Nr   )NF)Nrh   Nr   r   NFF)4r   abcr   r   r.   Znumpyr)   Zscipyr   r   Zscipy.sparser(   baser   r   r	   ri   r
   r   Zutils.extmathr   r   Zutilsr   r   r   r   r   Zpreprocessingr   Zmodel_selectionr   Z	externalsr   Zmetrics.scorerr   
exceptionsr   r=   rB   rQ   r]   rg   r   Zwith_metaclassr   r   r   r   r   r   r   r    r    r    r!   <module>   sR   	
9

=  
  5 $ 3 z<o