ó
‡ˆ\c           @   s  d  Z  d d l m Z d d l m Z m Z d d l Z d d l Z d d l Z	 d d l
 j Z d d l m Z d d l m Z d d l m Z d d	 l m Z m Z d d
 l m Z m Z m Z d d l m Z m Z d d l m Z d d l m Z d d l m Z d d l  m! Z! m" Z" d d l# m$ Z$ d d l% m& Z& m' Z' d d l m( Z( d d l) m* Z* d d l+ m, Z- d Z. d d „ Z0 e1 e2 d e1 e2 d „ Z3 d „  Z4 d e j5 e e ƒ f d „  ƒ  YZ6 d e f d „  ƒ  YZ7 d e8 f d „  ƒ  YZ9 d e6 e f d  „  ƒ  YZ: e2 d! „ Z; d S("   s   
Generalized Linear models.
iÿÿÿÿ(   t   division(   t   ABCMetat   abstractmethodN(   t   linalg(   t   sparsei   (   t   six(   t   Parallelt   delayed(   t   BaseEstimatort   ClassifierMixint   RegressorMixin(   t   check_arrayt	   check_X_y(   t   FLOAT_DTYPES(   t   check_random_state(   t   safe_sparse_dot(   t   mean_variance_axist   inplace_column_scale(   t   sparse_lsqr(   t   ArrayDatasett
   CSRDataset(   t   check_is_fitted(   t   NotFittedError(   t	   normalizeg{®Gáz„?c         C   s”   t  | ƒ } | j d t j t j ƒ j ƒ } t j |  ƒ rl t |  j	 |  j
 |  j | | d | ƒ} t } n t |  | | d | ƒ} d } | | f S(   s  Create ``Dataset`` abstraction for sparse and dense inputs.

    This also returns the ``intercept_decay`` which is different
    for sparse datasets.

    Parameters
    ----------
    X : array_like, shape (n_samples, n_features)
        Training data

    y : array_like, shape (n_samples, )
        Target values.

    sample_weight : numpy array of shape (n_samples,)
        The weight of each sample

    random_state : int, RandomState instance or None (default)
        Determines random number generation for dataset shuffling and noise.
        Pass an int for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    Returns
    -------
    dataset
        The ``Dataset`` abstraction
    intercept_decay
        The intercept decay
    i   t   seedg      ð?(   R   t   randintt   npt   iinfot   int32t   maxt   spt   issparseR   t   datat   indptrt   indicest   SPARSE_INTERCEPT_DECAYR   (   t   Xt   yt   sample_weightt   random_statet   rngR   t   datasett   intercept_decay(    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   make_dataset0   s    !		c         C   s  t  | t j ƒ r d } n  | rH t |  d | d d d g d t ƒ}  n9 | r t j |  ƒ rl |  j ƒ  }  q |  j d d ƒ }  n  t	 j
 | d |  j ƒ} | rít j |  ƒ rTt |  d d	 ƒ\ } }	 | sâ |  j j d	 ƒ | (n  | r2|	 |  j d	 9}	 t	 j |	 |	 ƒ }
 ~	 d
 |
 |
 d	 k <t |  d |
 ƒ qÅt	 j |  j d
 d |  j ƒ}
 nq t	 j |  d d	 d | ƒ} |  | 8}  | r¦t |  d d	 d t d t ƒ\ }  }
 n t	 j |  j d
 d |  j ƒ}
 t	 j | d d	 d | ƒ} | | } n t	 j |  j d
 d |  j ƒ} t	 j |  j d
 d |  j ƒ}
 | j d
 k rO|  j j d	 ƒ } n t	 j | j d
 d |  j ƒ} |  | | | |
 f S(   sS  
    Centers data to have mean zero along axis 0. If fit_intercept=False or if
    the X is a sparse matrix, no centering is done, but normalization can still
    be applied. The function returns the statistics necessary to reconstruct
    the input data, which are X_offset, y_offset, X_scale, such that the output

        X = (X - X_offset) / X_scale

    X_scale is the L2 norm of X - X_offset. If sample_weight is not None,
    then the weighted mean of X and y is zero, and not the mean itself. If
    return_mean=True, the mean, eventually weighted, is returned, independently
    of whether X was centered (option used for optimization with sparse data in
    coordinate_descend).

    This is here because nearly all linear models will want their data to be
    centered. This function also systematically makes y consistent with X.dtype
    t   copyt   accept_sparset   csrt   csct   dtypet   ordert   Kt   axisi    i   g      ð?t   weightst   return_normN(   t
   isinstancet   numberst   Numbert   NoneR   R   R   R   R,   R   t   asarrayR0   R   t   typet   shapet   sqrtR   t   onest   averaget   f_normalizet   Falset   Truet   zerost   ndim(   R$   R%   t   fit_interceptR   R,   R&   t   return_meant   check_inputt   X_offsett   X_vart   X_scalet   y_offset(    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   _preprocess_data]   sH    	"
c         C   s‰   |  j  d } t j | | d t j | ƒ j ƒ} t j | ƒ } t j | d f d | | f ƒ} t | |  ƒ }  t | | ƒ } |  | f S(   s+   Rescale data so as to support sample_weighti    R0   R<   (	   R<   R   t   fullt   arrayR0   R=   R   t
   dia_matrixR   (   R$   R%   R&   t	   n_samplest	   sw_matrix(    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   _rescale_data­   s    t   LinearModelc           B   sD   e  Z d  Z e d „  ƒ Z d „  Z d „  Z e e ƒ Z d „  Z	 RS(   s   Base class for Linear Modelsc         C   s   d S(   s
   Fit model.N(    (   t   selfR$   R%   (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   fit½   s    c         C   sH   t  |  d ƒ t | d d d d g ƒ} t | |  j j d t ƒ|  j S(   Nt   coef_R-   R.   R/   t   coot   dense_output(   R   R   R   RV   t   TRB   t
   intercept_(   RT   R$   (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   _decision_functionÁ   s    c         C   s   |  j  | ƒ S(   s  Predict using the linear model

        Parameters
        ----------
        X : array_like or sparse matrix, shape (n_samples, n_features)
            Samples.

        Returns
        -------
        C : array, shape (n_samples,)
            Returns predicted values.
        (   R[   (   RT   R$   (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   predictÈ   s    c         C   sH   |  j  r; |  j | |  _ | t j | |  j j ƒ |  _ n	 d |  _ d S(   s   Set the intercept_
        g        N(   RE   RV   R   t   dotRY   RZ   (   RT   RH   RK   RJ   (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   _set_interceptÙ   s    	"(
   t   __name__t
   __module__t   __doc__R   RU   R[   R\   t   staticmethodRL   R^   (    (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyRS   º   s   		t   LinearClassifierMixinc           B   s)   e  Z d  Z d „  Z d „  Z d „  Z RS(   sR   Mixin for linear classifiers.

    Handles prediction for sparse and dense X.
    c         C   sÚ   t  |  d ƒ s |  j d	 k rB t d i t |  ƒ j d 6ƒ ‚ n  t | d d ƒ} |  j j d } | j d | k r— t d | j d | f ƒ ‚ n  t	 | |  j j
 d t ƒ|  j } | j d d k rÖ | j ƒ  S| S(
   sN  Predict confidence scores for samples.

        The confidence score for a sample is the signed distance of that
        sample to the hyperplane.

        Parameters
        ----------
        X : array_like or sparse matrix, shape (n_samples, n_features)
            Samples.

        Returns
        -------
        array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)
            Confidence scores per (sample, class) combination. In the binary
            case, confidence score for self.classes_[1] where >0 means this
            class would be predicted.
        RV   s(   This %(name)s instance is not fitted yett   nameR-   R.   i   s*   X has %d features per sample; expecting %dRX   N(   t   hasattrRV   R9   R   R;   R_   R   R<   t
   ValueErrorR   RY   RB   RZ   t   ravel(   RT   R$   t
   n_featurest   scores(    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   decision_functionë   s    c         C   s\   |  j  | ƒ } t | j ƒ d k r? | d k j t j ƒ } n | j d d ƒ } |  j | S(   s&  Predict class labels for samples in X.

        Parameters
        ----------
        X : array_like or sparse matrix, shape (n_samples, n_features)
            Samples.

        Returns
        -------
        C : array, shape [n_samples]
            Predicted class label per sample.
        i   i    R3   (   Rj   t   lenR<   t   astypeR   t   intt   argmaxt   classes_(   RT   R$   Ri   R"   (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyR\     s
    c         C   s    |  j  | ƒ } | d 9} t j | | ƒ | d 7} t j | | ƒ | j d k rl t j d | | g ƒ j S| | j d d ƒ j | j	 d d f ƒ } | Sd S(   sî   Probability estimation for OvR logistic regression.

        Positive class probabilities are computed as
        1. / (1. + np.exp(-self.decision_function(X)));
        multiclass is handled by normalizing that over all classes.
        iÿÿÿÿi   R3   i    N(
   Rj   R   t   expt
   reciprocalRD   t   vstackRY   t   sumt   reshapeR<   (   RT   R$   t   prob(    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   _predict_proba_lr   s    

,(   R_   R`   Ra   Rj   R\   Rv   (    (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyRc   å   s   	!	t   SparseCoefMixinc           B   s    e  Z d  Z d „  Z d „  Z RS(   sl   Mixin for converting coef_ to and from CSR format.

    L1-regularizing estimators should inherit this.
    c         C   sD   d } t  |  d d | ƒt j |  j ƒ r@ |  j j ƒ  |  _ n  |  S(   s…  Convert coefficient matrix to dense array format.

        Converts the ``coef_`` member (back) to a numpy.ndarray. This is the
        default format of ``coef_`` and is required for fitting, so calling
        this method is only required on models that have previously been
        sparsified; otherwise, it is a no-op.

        Returns
        -------
        self : estimator
        s6   Estimator, %(name)s, must be fitted before densifying.RV   t   msg(   R   R   R   RV   t   toarray(   RT   Rx   (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   densify:  s
    c         C   s2   d } t  |  d d | ƒt j |  j ƒ |  _ |  S(   sv  Convert coefficient matrix to sparse format.

        Converts the ``coef_`` member to a scipy.sparse matrix, which for
        L1-regularized models can be much more memory- and storage-efficient
        than the usual numpy.ndarray representation.

        The ``intercept_`` member is not converted.

        Notes
        -----
        For non-sparse models, i.e. when there are not many zeros in ``coef_``,
        this may actually *increase* memory usage, so use this method with
        care. A rule of thumb is that the number of zero elements, which can
        be computed with ``(coef_ == 0).sum()``, must be more than 50% for this
        to provide significant benefits.

        After calling this method, further fitting with the partial_fit
        method (if any) will not work until you call densify.

        Returns
        -------
        self : estimator
        s7   Estimator, %(name)s, must be fitted before sparsifying.RV   Rx   (   R   R   t
   csr_matrixRV   (   RT   Rx   (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   sparsifyL  s    (   R_   R`   Ra   Rz   R|   (    (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyRw   4  s   	t   LinearRegressionc           B   s/   e  Z d  Z e e e d d „ Z d d „ Z RS(   s	  
    Ordinary least squares Linear Regression.

    Parameters
    ----------
    fit_intercept : boolean, optional, default True
        whether to calculate the intercept for this model. If set
        to False, no intercept will be used in calculations
        (e.g. data is expected to be already centered).

    normalize : boolean, optional, default False
        This parameter is ignored when ``fit_intercept`` is set to False.
        If True, the regressors X will be normalized before regression by
        subtracting the mean and dividing by the l2-norm.
        If you wish to standardize, please use
        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on
        an estimator with ``normalize=False``.

    copy_X : boolean, optional, default True
        If True, X will be copied; else, it may be overwritten.

    n_jobs : int or None, optional (default=None)
        The number of jobs to use for the computation. This will only provide
        speedup for n_targets > 1 and sufficient large problems.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    Attributes
    ----------
    coef_ : array, shape (n_features, ) or (n_targets, n_features)
        Estimated coefficients for the linear regression problem.
        If multiple targets are passed during the fit (y 2D), this
        is a 2D array of shape (n_targets, n_features), while if only
        one target is passed, this is a 1D array of length n_features.

    intercept_ : array
        Independent term in the linear model.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.linear_model import LinearRegression
    >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
    >>> # y = 1 * x_0 + 2 * x_1 + 3
    >>> y = np.dot(X, np.array([1, 2])) + 3
    >>> reg = LinearRegression().fit(X, y)
    >>> reg.score(X, y)
    1.0
    >>> reg.coef_
    array([1., 2.])
    >>> reg.intercept_ # doctest: +ELLIPSIS
    3.0000...
    >>> reg.predict(np.array([[3, 5]]))
    array([16.])

    Notes
    -----
    From the implementation point of view, this is just plain Ordinary
    Least Squares (scipy.linalg.lstsq) wrapped as a predictor object.

    c         C   s(   | |  _  | |  _ | |  _ | |  _ d  S(   N(   RE   R   t   copy_Xt   n_jobs(   RT   RE   R   R~   R   (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   __init__ª  s    			c   
         s   |  j  } t ˆ  ˆ d d d d g d t d t ƒ\ ‰  ‰ | d k	 rl t j | ƒ j d k rl t d ƒ ‚ n  |  j ˆ  ˆ d	 |  j	 d
 |  j
 d |  j d | ƒ\ ‰  ‰ } } } | d k	 rÕ t ˆ  ˆ | ƒ \ ‰  ‰ n  t j ˆ  ƒ r©ˆ j d k  rt ˆ  ˆ ƒ } | d |  _ | d |  _ qât d | ƒ ‡  ‡ f d †  t ˆ j d ƒ Dƒ ƒ }	 t j g  |	 D] } | d ^ qaƒ |  _ t j g  |	 D] } | d ^ qŠƒ |  _ n9 t j ˆ  ˆ ƒ \ |  _ |  _ |  _ |  _ |  j j |  _ ˆ j d k r	t j |  j ƒ |  _ n  |  j | | | ƒ |  S(   sC  
        Fit linear model.

        Parameters
        ----------
        X : array-like or sparse matrix, shape (n_samples, n_features)
            Training data

        y : array_like, shape (n_samples, n_targets)
            Target values. Will be cast to X's dtype if necessary

        sample_weight : numpy array of shape [n_samples]
            Individual weights for each sample

            .. versionadded:: 0.17
               parameter *sample_weight* support to LinearRegression.

        Returns
        -------
        self : returns an instance of self.
        R-   R.   R/   RW   t	   y_numerict   multi_outputi   s)   Sample weights must be 1D array or scalarRE   R   R,   R&   i   i    i   R   c         3   s:   |  ]0 } t  t ƒ ˆ  ˆ d  d  … | f j ƒ  ƒ Vq d  S(   N(   R   R   Rg   (   t   .0t   j(   R$   R%   (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pys	   <genexpr>ß  s   N(   R   R   RB   R9   R   t
   atleast_1dRD   Rf   RL   RE   R   R~   RR   R   R   R   RV   t	   _residuesR   t   rangeR<   Rr   R   t   lstsqt   rank_t	   singular_RY   Rg   R^   (
   RT   R$   R%   R&   t   n_jobs_RH   RK   RJ   t   outt   outs(    (   R$   R%   s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyRU   ±  s4    	$!),*N(   R_   R`   Ra   RB   RA   R9   R€   RU   (    (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyR}   j  s   >	c         C   sŠ  |  j  \ } }	 t j |  ƒ rc t } t |  | d | d | d t d t d | ƒ\ }  } }
 } } n6 t |  | d | d | d | d | ƒ\ }  } }
 } } t | d ƒ r| rÊ t j |
 t j	 |	 ƒ ƒ sì | rt j | t j
 |	 ƒ ƒ rt j d t ƒ d } d } n  t | t j ƒ r8| d k r8| |	 k } n  | t k r‡t j d	 |	 |	 f d
 |  j d d ƒ } t j |  j |  d | ƒn  t | d ƒ sŸd } n  t | d ƒ rq| d k rqt j |  j | j g g  ƒ } | j d k r!t j d	 |	 d
 | d d ƒ } t j |  j | d | ƒqq| j  d } t j d	 |	 | f d
 | d d ƒ } t j | j |  d | j ƒn  |  | |
 | | | | f S(   s6   Aux function used at beginning of fit in linear modelsRE   R   R,   RF   RG   t	   __array__sl   Gram matrix was provided but X was centered to fit intercept, or X was normalized : recomputing Gram matrix.t   autoR<   R0   R1   t   CRŒ   i   t   FN(   R<   R   t
   isspmatrixRA   RL   RB   Re   R   t   allcloseRC   R>   t   warningst   warnt   UserWarningR9   R6   R   t   string_typest   emptyR0   R]   RY   t   find_common_typeRD   (   R$   R%   t   Xyt
   precomputeR   RE   R,   RG   RP   Rh   RH   RK   RJ   t   common_dtypet	   n_targets(    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   _pre_fitî  sD    '""					(<   Ra   t
   __future__R    t   abcR   R   R7   R”   t   numpyR   t   scipy.sparseR   R   t   scipyR   t	   externalsR   t   utils._joblibR   R   t   baseR   R	   R
   t   utilsR   R   t   utils.validationR   R   t   utils.extmathR   t   utils.sparsefuncsR   R   t   utils.fixesR   t   utils.seq_datasetR   R   R   t
   exceptionsR   t   preprocessing.dataR   R@   R#   R9   R+   RA   RB   RL   RR   t   with_metaclassRS   Rc   t   objectRw   R}   Rž   (    (    (    s8   lib/python2.7/site-packages/sklearn/linear_model/base.pyt   <module>   s>   -O	"+O6…