
p7]c           @  s(  d  Z  d d l m Z d d l m Z m Z m Z d d l Z d d l	 m
 Z
 d d l m Z d d l m Z d d l m Z m Z d d	 l m Z m Z d d l j j Z d d l j j Z d d
 l m Z d d l Z d d l m Z d d l m  Z  d d l! m" Z# d Z$ d d d d d g Z% d Z& d   Z' d e j( f d     YZ) d e) f d     YZ* d e) f d     YZ+ d e+ f d     YZ, d e* f d     YZ- d d e. e/ e0 d  Z1 d e0 d  Z2 d  e j3 f d!     YZ4 d" e4 f d#     YZ5 d$ e j6 f d%     YZ7 e j8 e7 e4  d S(&   sH  
This module implements standard regression models:

Generalized Least Squares (GLS)
Ordinary Least Squares (OLS)
Weighted Least Squares (WLS)
Generalized Least Squares with autoregressive error terms GLSAR(p)

Models are specified with an endogenous response variable and an
exogenous design matrix and are fit using their `fit` method.

Subclasses that have more complicated covariance matrices
should write over the 'whiten' method as the fit method
prewhitens the response by calling 'whiten'.

General reference for regression models:

D. C. Montgomery and E.A. Peck. "Introduction to Linear Regression
    Analysis." 2nd. Ed., Wiley, 1992.

Econometrics references for regression models:

R. Davidson and J.G. MacKinnon.  "Econometric Theory and Methods," Oxford,
    2004.

W. Green.  "Econometric Analysis," 5th ed., Pearson, 2003.
i(   t   print_function(   t   lranget   lzipt   rangeN(   t   toeplitz(   t   stats(   t   optimize(   t	   chain_dott   pinv_extended(   t   cache_readonlyt   cache_writable(   t
   _ELRegOpts(   t   InvalidTestWarning(   t   PredictionResultsi   (   t   _predictions   restructuredtext ent   GLSt   WLSt   OLSt   GLSARR   sI  
        Return a regularized fit to a linear regression model.

        Parameters
        ----------
        method : string
            'elastic_net' and 'sqrt_lasso' are currently implemented.
        alpha : scalar or array-like
            The penalty weight.  If a scalar, the same penalty weight
            applies to all variables in the model.  If a vector, it
            must have the same length as `params`, and contains a
            penalty weight for each coefficient.
        L1_wt: scalar
            The fraction of the penalty given to the L1 penalty term.
            Must be between 0 and 1 (inclusive).  If 0, the fit is a
            ridge fit, if 1 it is a lasso fit.
        start_params : array-like
            Starting values for ``params``.
        profile_scale : bool
            If True the penalized fit is computed using the profile
            (concentrated) log-likelihood for the Gaussian model.
            Otherwise the fit uses the residual sum of squares.
        refit : bool
            If True, the model is refit using only the variables that
            have non-zero coefficients in the regularized fit.  The
            refitted model is not regularized.
        distributed : bool
            If True, the model uses distributed methods for fitting,
            will raise an error if True and partitions is None.
        generator : function
            generator used to partition the model, allows for handling
            of out of memory/parallel computing.
        partitions : scalar
            The number of partitions desired for the distributed
            estimation.
        threshold : scalar or array-like
            The threshold below which coefficients are zeroed out,
            only used for distributed estimation

        Returns
        -------
        A RegularizedResults instance.

        Notes
        -----
        The elastic net uses a combination of L1 and L2 penalties.
        The implementation closely follows the glmnet package in R.

        The function that is minimized is:

        .. math::

            0.5*RSS/n + alpha*((1-L1\_wt)*|params|_2^2/2 + L1\_wt*|params|_1)

        where RSS is the usual regression sum of squares, n is the
        sample size, and :math:`|*|_1` and :math:`|*|_2` are the L1 and L2
        norms.

        For WLS and GLS, the RSS is calculated using the whitened endog and
        exog data.

        Post-estimation results are based on the same data used to
        select variables, hence may be subject to overfitting biases.

        The elastic_net method uses the following keyword arguments:

        maxiter : int
            Maximum number of iterations
        cnvrg_tol : float
            Convergence threshold for line searches
        zero_tol : float
            Coefficients below this threshold are treated as zero.

        The square root lasso approach is a variation of the Lasso
        that is largely self-tuning (the optimal tuning parameter
        does not depend on the standard deviation of the regression
        errors).  If the errors are Gaussian, the tuning parameter
        can be taken to be

        alpha = 1.1 * np.sqrt(n) * norm.ppf(1 - 0.05 / (2 * p))

        where n is the sample size and p is the number of predictors.

        The square root lasso uses the following keyword arguments:

        zero_tol : float
            Coefficients below this threshold are treated as zero.


        References
        ----------
        Friedman, Hastie, Tibshirani (2008).  Regularization paths for
        generalized linear models via coordinate descent.  Journal of
        Statistical Software 33(1), 1-22 Feb 2010.

        A Belloni, V Chernozhukov, L Wang (2011).  Square-root Lasso:
        pivotal recovery of sparse signals via conic programming.
        Biometrika 98(4), 791-806.
        https://arxiv.org/pdf/1009.5689.pdf
        c         C  s   |  d k r d St j |   j   }  |  j d k rI t j |  |  }  n  |  j d k r |  j | f k r t d | | | f   n  d t j |   } nR |  j | | f k r t d | | | f   n  t j	 j
 t j	 j |    j } |  | f S(   s   
    Returns sigma (matrix, nobs by nobs) for GLS and the inverse of its
    Cholesky decomposition.  Handles dimensions and checks integrity.
    If sigma is None, returns None, None. Otherwise returns sigma,
    cholsigmainv.
    i    i   sF   Sigma must be a scalar, 1d of length %s or a 2d array of shape %s x %sN(   NN(   t   Nonet   npt   asarrayt   squeezet   ndimt   repeatt   shapet
   ValueErrort   sqrtt   linalgt   choleskyt   invt   T(   t   sigmat   nobst   cholsigmainv(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt
   _get_sigma   s    !t   RegressionModelc           B  s   e  Z d  Z d   Z d   Z e d    Z e j d    Z e d    Z e j d    Z d   Z	 d d	 d d d
  Z d d  Z d d d  Z RS(   sp   
    Base class for linear regression models. Should not be directly called.

    Intended for subclassing.
    c         K  s<   t  t |   j | | |  |  j j d d d d g  d  S(   Nt
   pinv_wexogt   wendogt   wexogt   weights(   t   superR$   t   __init__t
   _data_attrt   extend(   t   selft   endogt   exogt   kwargs(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR*      s    c         C  sb   |  j  |  j  |  _ |  j  |  j  |  _ t |  j j d  |  _ d  |  _	 d  |  _
 d  |  _ d  S(   Ni    (   t   whitenR/   R'   R.   R&   t   floatR   R!   R   t	   _df_modelt	   _df_residt   rank(   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt
   initialize   s    		c         C  s\   |  j  d k rU |  j d k r9 t j j |  j  |  _ n  t |  j |  j  |  _  n  |  j  S(   s   
        The model degree of freedom, defined as the rank of the regressor
        matrix minus 1 if a constant is included.
        N(	   R3   R   R5   R   R   t   matrix_rankR/   R2   t
   k_constant(   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   df_model   s
    c         C  s   | |  _  d  S(   N(   R3   (   R-   t   value(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR9      s    c         C  sV   |  j  d k rO |  j d k r9 t j j |  j  |  _ n  |  j |  j |  _  n  |  j  S(   s   
        The residual degree of freedom, defined as the number of observations
        minus the rank of the regressor matrix.
        N(   R4   R   R5   R   R   R7   R/   R!   (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   df_resid   s
    c         C  s   | |  _  d  S(   N(   R4   (   R-   R:   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR;      s    c         C  s   t  d   d  S(   Ns   Subclasses should implement.(   t   NotImplementedError(   R-   t   X(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR1      s    t   pinvt	   nonrobustc         K  s  | d k r t  |  d  o6 t  |  d  o6 t  |  d  s t |  j  \ |  _ } t j |  j t j |  j   |  _ | |  _ t j	 j
 t j |   |  _ n  t j |  j |  j  } n#| d k rt  |  d  o t  |  d  o t  |  d  o t  |  d  st j	 j |  j  \ } }	 | |	 |  _ |  _ t j	 j t j |	 j |	   |  _ t j	 j |	 d d  |  _ t j	 j
 |	  |  _ n |  j |  j } }	 t j | j |  j  |  _ }
 t j	 j |	 |
  } n t d	   |  j d k rt |  j |  j  |  _ n  |  j d k r-|  j |  j |  _ n  t |  t  rit  |  | d |  j d
 | d | d | } n- t! |  | d |  j d
 | d | d | | } t" |  S(   s  
        Full fit of the model.

        The results include an estimate of covariance matrix, (whitened)
        residuals and an estimate of scale.

        Parameters
        ----------
        method : str, optional
            Can be "pinv", "qr".  "pinv" uses the Moore-Penrose pseudoinverse
            to solve the least squares problem. "qr" uses the QR
            factorization.
        cov_type : str, optional
            See `regression.linear_model.RegressionResults` for a description
            of the available covariance estimators
        cov_kwds : list or None, optional
            See `linear_model.RegressionResults.get_robustcov_results` for a
            description required keywords for alternative covariance estimators
        use_t : bool, optional
            Flag indicating to use the Student's t distribution when computing
            p-values.  Default behavior depends on cov_type. See
            `linear_model.RegressionResults.get_robustcov_results` for
            implementation details.

        Returns
        -------
        A RegressionResults class instance.

        See Also
        --------
        regression.linear_model.RegressionResults
        regression.linear_model.RegressionResults.get_robustcov_results

        Notes
        -----
        The fit method uses the pseudoinverse of the design/exogenous variables
        to solve the least squares minimization.
        R>   R%   t   normalized_cov_paramsR5   t   qrt   exog_Qt   exog_Ri    s   method has to be "pinv" or "qr"t   cov_typet   cov_kwdst   use_tN(#   t   hasattrR   R'   R%   R   t   dott	   transposeR@   t   wexog_singular_valuesR   R7   t   diagR5   R&   RA   RB   RC   R   R   t   svdt   effectst   solveR   R3   R   R2   R8   R4   R!   R;   t
   isinstanceR   t
   OLSResultst   RegressionResultst   RegressionResultsWrapper(   R-   t   methodRD   RE   RF   R0   t   singular_valuest   betat   Qt   RRM   t   lfit(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   fit   sN    (	!$					c         C  s(   | d k r |  j } n  t j | |  S(   s  
        Return linear predicted values from a design matrix.

        Parameters
        ----------
        params : array-like
            Parameters of a linear model
        exog : array-like, optional.
            Design / exogenous data. Model exog is used if None.

        Returns
        -------
        An array of fitted values

        Notes
        -----
        If the model has not yet been fit, params is not optional.
        N(   R   R/   R   RH   (   R-   t   paramsR/   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   predictS  s    c         C  sY   |  j  | |  } | d k r7 d d l m } | } n  | d | d t j |   } | S(   s  
        Returns a random number generator for the predictive distribution.

        Parameters
        ----------
        params : array-like
            The model parameters (regression coefficients).
        scale : scalar
            The variance parameter.
        exog : array-like
            The predictor variable matrix.
        dist_class : class
            A random number generator class.  Must take 'loc' and 'scale'
            as arguments and return a random number generator implementing
            an ``rvs`` method for simulating random values. Defaults to Gaussian.

        Returns
        -------
        gen
            Frozen random number generator object with mean and variance
            determined by the fitted linear model.  Use the ``rvs`` method
            to generate random values.

        Notes
        -----
        Due to the behavior of ``scipy.stats.distributions objects``,
        the returned random number generator must be called with
        ``gen.rvs(n)`` where ``n`` is the number of observations in
        the data set used to fit the model.  If any other value is
        used for ``n``, misleading results will be produced.
        i(   t   normt   loct   scaleN(   R[   R   t   scipy.stats.distributionsR\   R   R   (   R-   RZ   R^   R/   t
   dist_classRY   R\   t   gen(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   get_distributionn  s     	N(   t   __name__t
   __module__t   __doc__R*   R6   t   propertyR9   t   setterR;   R1   R   RY   R[   Rb   (    (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR$      s   		
		]c           B  s   e  Z d  i e j d 6e j e j d 6Z d d d d  Z d   Z	 d   Z
 d e d  Z d d	 d
 d e e d  Z e e _ RS(   s8
  
    Generalized least squares model with a general covariance structure.

    %(params)s
    sigma : scalar or array
        `sigma` is the weighting matrix of the covariance.
        The default is None for no scaling.  If `sigma` is a scalar, it is
        assumed that `sigma` is an n x n diagonal matrix with the given
        scalar, `sigma` as the value of each diagonal element.  If `sigma`
        is an n-length vector, then `sigma` is assumed to be a diagonal
        matrix with the given `sigma` on the diagonal.  This should be the
        same as WLS.
    %(extra_params)s

    Attributes
    ----------
    pinv_wexog : array
        `pinv_wexog` is the p x n Moore-Penrose pseudoinverse of `wexog`.
    cholsimgainv : array
        The transpose of the Cholesky decomposition of the pseudoinverse.
    df_model : float
        p - 1, where p is the number of regressors including the intercept.
        of freedom.
    df_resid : float
        Number of observations n less the number of parameters p.
    llf : float
        The value of the likelihood function of the fitted model.
    nobs : float
        The number of observations n.
    normalized_cov_params : array
        p x p array :math:`(X^{T}\Sigma^{-1}X)^{-1}`
    results : RegressionResults instance
        A property that returns the RegressionResults class if fit.
    sigma : array
        `sigma` is the n x n covariance structure of the error terms.
    wexog : array
        Design matrix whitened by `cholsigmainv`
    wendog : array
        Response variable whitened by `cholsigmainv`

    Notes
    -----
    If sigma is a function of the data making one of the regressors
    a constant, then the current postestimation statistics will not be correct.


    Examples
    --------
    >>> import numpy as np
    >>> import statsmodels.api as sm
    >>> data = sm.datasets.longley.load(as_pandas=False)
    >>> data.exog = sm.add_constant(data.exog)
    >>> ols_resid = sm.OLS(data.endog, data.exog).fit().resid
    >>> res_fit = sm.OLS(ols_resid[1:], ols_resid[:-1]).fit()
    >>> rho = res_fit.params

    `rho` is a consistent estimator of the correlation of the residuals from
    an OLS fit of the longley data.  It is assumed that this is the true rho
    of the AR process data.

    >>> from scipy.linalg import toeplitz
    >>> order = toeplitz(np.arange(16))
    >>> sigma = rho**order

    `sigma` is an n x n matrix of the autocorrelation structure of the
    data.

    >>> gls_model = sm.GLS(data.endog, data.exog, sigma=sigma)
    >>> gls_results = gls_model.fit()
    >>> print(gls_results.summary())

    RZ   t   extra_paramst   nonec         K  si   t  | t |   \ } } t t |   j | | d | d | d | d | | |  j j d d g  d  S(   Nt   missingt   hasconstR    R"   (   R#   t   lenR)   R   R*   R+   R,   (   R-   R.   R/   R    Rj   Rk   R0   R"   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR*     s
    
c         C  s   t  j |  } |  j d k s0 |  j j d k r4 | S|  j j d k r~ | j d k r` | |  j S| |  j d d  d f Sn t  j |  j |  Sd S(   s   
        GLS whiten method.

        Parameters
        ----------
        X : array-like
            Data to be whitened.

        Returns
        -------
        np.dot(cholsigmainv,X)

        See Also
        --------
        regression.GLS
        i   N(    (   R   R   R    R   R   R   R"   RH   (   R-   R=   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR1     s    !c         C  s   |  j  d } t j |  j t j |  j |  d d d } t j |  | } | d t j t j |  | 8} t j |  j	  r |  j	 j
 d k r t j j |  j	  } | d | d 8} q | d t j t j |  j	   8} n  | S(   s)  
        Returns the value of the Gaussian log-likelihood function at params.

        Given the whitened design matrix, the log-likelihood is evaluated
        at the parameter vector `params` for the dependent variable `endog`.

        Parameters
        ----------
        params : array-like
            The parameter estimates

        Returns
        -------
        loglike : float
            The value of the log-likelihood function for a GLS Model.


        Notes
        -----
        The log-likelihood function for the normal distribution is

        .. math:: -\frac{n}{2}\log\left(\left(Y-\hat{Y}\right)^{\prime}\left(Y-\hat{Y}\right)\right)-\frac{n}{2}\left(1+\log\left(\frac{2\pi}{n}\right)\right)-\frac{1}{2}\log\left(\left|\Sigma\right|\right)

        Y and Y-hat are whitened.

        g       @i   t   axisi    i   g      ?(   R!   R   t   sumR&   RH   R'   t   logt   pit   anyR    R   R   t   slogdet(   R-   RZ   t   nobs2t   SSRt   llft   det(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   loglike
  s    /"&c         C  se   |  j  d k s! |  j  j d k r8 t j |  j j d  S|  j  j d k rQ |  j St j |  j  Sd S(   s  Weights for calculating Hessian

        Parameters
        ----------
        params : ndarray
            parameter at which Hessian is evaluated
        scale : None or float
            If scale is None, then the default scale will be calculated.
            Default scale is defined by `self.scaletype` and set in fit.
            If scale is not None, then it is used as a fixed scale.
        observed : bool
            If True, then the observed Hessian is returned. If false then the
            expected information matrix is returned.

        Returns
        -------
        hessian_factor : ndarray, 1d
            A 1d weight vector used in the calculation of the Hessian.
            The hessian is obtained by `(exog.T * hessian_factor).dot(exog)`
        i    i   N(    (	   R    R   R   R   t   onesR/   R   R"   RK   (   R-   RZ   R^   t   observed(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   hessian_factor4  s
    !t   elastic_netg        g      ?c         K  s   |  j  d  k	 rB | t j d t j |  j    t |  j  } n  t |  j |  j	  j
 d | d | d | d | d | d | |  } d d	 l m }	 m }
 |	 |  | j  } |
 |  S(
   Ni   RS   t   alphat   L1_wtt   start_paramst   profile_scalet   refiti(   t   RegularizedResultst   RegularizedResultsWrapper(   R    R   R   Rn   RK   Rl   R.   R   R&   R'   t   fit_regularizedt   statsmodels.base.elastic_netR   R   RZ   (   R-   RS   R|   R}   R~   R   R   R0   t   rsltR   R   t   rrslt(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR   Q  s    3N(   Rc   Rd   t   baset   _model_params_doct   _missing_param_doct   _extra_param_docRe   R   R*   R1   Rw   t   TrueRz   t   FalseR   t   _fit_regularized_doc(    (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR     s   H		*	c           B  s   e  Z d  i e j d 6e j e j d 6Z d d d d  Z d   Z	 d   Z
 d e d  Z d	 d
 d d e e d  Z e e _ RS(   sJ  
    A regression model with diagonal but non-identity covariance structure.

    The weights are presumed to be (proportional to) the inverse of
    the variance of the observations.  That is, if the variables are
    to be transformed by 1/sqrt(W) you must supply weights = 1/W.

    %(params)s
    weights : array-like, optional
        1d array of weights.  If you supply 1/W then the variables are
        pre- multiplied by 1/sqrt(W).  If no weights are supplied the
        default value is 1 and WLS results are the same as OLS.
    %(extra_params)s

    Attributes
    ----------
    weights : array
        The stored weights supplied as an argument.

    See Also
    --------
    regression.GLS

    Examples
    --------
    >>> import numpy as np
    >>> import statsmodels.api as sm
    >>> Y = [1,3,4,5,2,3,4]
    >>> X = range(1,8)
    >>> X = sm.add_constant(X)
    >>> wls_model = sm.WLS(Y,X, weights=list(range(1,8)))
    >>> results = wls_model.fit()
    >>> results.params
    array([ 2.91666667,  0.0952381 ])
    >>> results.tvalues
    array([ 2.0652652 ,  0.35684428])
    >>> print(results.t_test([1, 0]))
    <T test: effect=array([ 2.91666667]), sd=array([[ 1.41224801]]), t=array([[ 2.0652652]]), p=array([[ 0.04690139]]), df_denom=5>
    >>> print(results.f_test([0, 1]))
    <F test: F=array([[ 0.12733784]]), p=[[ 0.73577409]], df_denom=5, df_num=1>

    Notes
    -----
    If the weights are a function of the data, then the post estimation
    statistics such as fvalue and mse_model might not be correct, as the
    package does not yet support no-constant regression.
    RZ   Rh   g      ?Ri   c      
   K  sL  t  j |  } | j d	 k r | d k re d | k re | d d  k	 re t  j | t | d   } q t  j | t |   } n  t |  d k r t  j | j   g  } n | j   } t t |   j	 | | d | d | d | | |  j
 j d } |  j } | t  j |  | } | j | k rH| j d | k rHt d   n  d  S(
   Nt   dropt   missing_idxi   Rj   R(   Rk   i    s/   Weights must be scalar or same length as design(    (   R   t   arrayR   R   R   Rl   R   R)   R   R*   R/   R(   Rn   t   sizeR   (   R-   R.   R/   R(   Rj   Rk   R0   R!   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR*     s     	"c         C  si   t  j |  } | j d k r2 | t  j |  j  S| j d k re t  j |  j  d d  d f | Sd S(   s  
        Whitener for WLS model, multiplies each column by sqrt(self.weights)

        Parameters
        ----------
        X : array-like
            Data to be whitened

        Returns
        -------
        whitened : array-like
            sqrt(weights)*X
        i   i   N(   R   R   R   R   R(   R   (   R-   R=   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR1     s
    c         C  s   |  j  d } t j |  j t j |  j |  d d d } t j |  | } | d t j t j |  | 8} | d t j t j |  j   7} | S(   s  
        Returns the value of the gaussian log-likelihood function at params.

        Given the whitened design matrix, the log-likelihood is evaluated
        at the parameter vector `params` for the dependent variable `Y`.

        Parameters
        ----------
        params : array-like
            The parameter estimates.

        Returns
        -------
        llf : float
            The value of the log-likelihood function for a WLS Model.

        Notes
        --------
        .. math:: -\frac{n}{2}\log\left(Y-\hat{Y}\right)-\frac{n}{2}\left(1+\log\left(\frac{2\pi}{n}\right)\right)-\frac{1}{2}log\left(\left|W\right|\right)

        where :math:`W` is a diagonal matrix
        g       @i   Rm   i    i   g      ?(	   R!   R   Rn   R&   RH   R'   Ro   Rp   R(   (   R-   RZ   Rs   Rt   Ru   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyRw     s    /"#c         C  s   |  j  S(   s  Weights for calculating Hessian

        Parameters
        ----------
        params : ndarray
            parameter at which Hessian is evaluated
        scale : None or float
            If scale is None, then the default scale will be calculated.
            Default scale is defined by `self.scaletype` and set in fit.
            If scale is not None, then it is used as a fixed scale.
        observed : bool
            If True, then the observed Hessian is returned. If false then the
            expected information matrix is returned.

        Returns
        -------
        hessian_factor : ndarray, 1d
            A 1d weight vector used in the calculation of the Hessian.
            The hessian is obtained by `(exog.T * hessian_factor).dot(exog)`
        (   R(   (   R-   RZ   R^   Ry   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyRz     s    R{   g        c         K  s   | t  j |  j  t |  j  } t |  j |  j  j d | d | d | d | d | d | |  } d d l m	 }	 m
 }
 |	 |  | j  } |
 |  S(	   NRS   R|   R}   R~   R   R   i(   R   R   (   R   Rn   R(   Rl   R   R&   R'   R   R   R   R   RZ   (   R-   RS   R|   R}   R~   R   R   R0   R   R   R   R   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR     s    #N(   Rc   Rd   R   R   R   R   Re   R   R*   R1   Rw   R   Rz   R   R   R   (    (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR   j  s   /			c           B  s   e  Z d  i e j d 6e j e j d 6Z d d d d  Z d d  Z	 d   Z
 d d  Z d   Z d d	  Z d e d
  Z d d d d e e d  Z e e _ d   Z d   Z RS(   s  
    A simple ordinary least squares model.

    %(params)s
    %(extra_params)s

    Attributes
    ----------
    weights : scalar
        Has an attribute weights = array(1.0) due to inheritance from WLS.

    See Also
    --------
    GLS

    Examples
    --------
    >>> import numpy as np
    >>>
    >>> import statsmodels.api as sm
    >>>
    >>> Y = [1,3,4,5,2,3,4]
    >>> X = range(1,8)
    >>> X = sm.add_constant(X)
    >>>
    >>> model = sm.OLS(Y,X)
    >>> results = model.fit()
    >>> results.params
    array([ 2.14285714,  0.25      ])
    >>> results.tvalues
    array([ 1.87867287,  0.98019606])
    >>> print(results.t_test([1, 0]))
    <T test: effect=array([ 2.14285714]), sd=array([[ 1.14062282]]), t=array([[ 1.87867287]]), p=array([[ 0.05953974]]), df_denom=5>
    >>> print(results.f_test(np.identity(2)))
    <F test: F=array([[ 19.46078431]]), p=[[ 0.00437251]], df_denom=5, df_num=2>

    Notes
    -----
    No constant is added by the model unless you are using formulas.
    RZ   Rh   Ri   c         K  sN   t  t |   j | | d | d | | d |  j k rJ |  j j d  n  d  S(   NRj   Rk   R(   (   R)   R   R*   t
   _init_keyst   remove(   R-   R.   R/   Rj   Rk   R0   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR*   C  s    
c         C  s   |  j  d } t |  j   } |  j t j |  j |  } t |  d  rW | |  j 8} n  t j | d  } | d k r | t j
 d t j  | t j
 | |  | } n+ | t j
 d t j |  | d | } | S(   s  
        The likelihood function for the OLS model.

        Parameters
        ----------
        params : array-like
            The coefficients with which to estimate the log-likelihood.
        scale : float or None
            If None, return the profile (concentrated) log likelihood
            (profiled over the scale parameter), else return the
            log-likelihood using the given scale value.

        Returns
        -------
        The likelihood function evaluated at params.
        g       @t   offseti   N(   R!   R2   R.   R   RH   R/   RG   R   Rn   R   Ro   Rp   (   R-   RZ   R^   Rs   R!   t   residt   ssrRu   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyRw   J  s    7+c         C  s   | S(   s=   
        OLS model whitener does nothing: returns Y.
        (    (   R-   t   Y(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR1   i  s    c         C  s   t  |  d  s |  j   n  t j |  j |  } |  j | } | d k r |  j d t j |  j j |  } | t j | |  7} |  j	 | | S| | Sd S(   s  
        Evaluate the score function at a given point.

        The score corresponds to the profile (concentrated)
        log-likelihood in which the scale parameter has been profiled
        out.

        Parameters
        ----------
        params : array-like
            The parameter vector at which the score function is
            computed.
        scale : float or None
            If None, return the profile (concentrated) log likelihood
            (profiled over the scale parameter), else return the
            log-likelihood using the given scale value.

        Returns
        -------
        The score vector.
        t   _wexog_xprodi   N(
   RG   t   _setup_score_hessR   RH   R   t   _wexog_x_wendogR   t   _wendog_xprodR   R!   (   R-   RZ   R^   t   xtxbt   sdrR   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   scoreo  s    c         C  s{   |  j  } t |  d  r( | |  j } n  t j | |  |  _ t j |  j j |  j  |  _	 t j |  j j |  |  _
 d  S(   NR   (   R&   RG   R   R   Rn   R   RH   R'   R   R   R   (   R-   t   y(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR     s    	c         C  s   t  |  d  s |  j   n  t j |  j |  } | d k r |  j d t j |  j j |  } | t j | |  7} d |  j d | } |  j | t j	 | |  | d } |  j
 | d S|  j | Sd S(   s  
        Evaluate the Hessian function at a given point.

        Parameters
        ----------
        params : array-like
            The parameter vector at which the Hessian is computed.
        scale : float or None
            If None, return the profile (concentrated) log likelihood
            (profiled over the scale parameter), else return the
            log-likelihood using the given scale value.

        Returns
        -------
        The Hessian matrix.
        R   i   iN(   RG   R   R   RH   R   R   R   R   R   t   outerR!   (   R-   RZ   R^   R   R   t   ssrpt   hm(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   hessian  s    %c         C  s   t  j |  j j d  S(   s  Weights for calculating Hessian

        Parameters
        ----------
        params : ndarray
            parameter at which Hessian is evaluated
        scale : None or float
            If scale is None, then the default scale will be calculated.
            Default scale is defined by `self.scaletype` and set in fit.
            If scale is not None, then it is used as a fixed scale.
        observed : bool
            If True, then the observed Hessian is returned. If false then the
            expected information matrix is returned.

        Returns
        -------
        hessian_factor : ndarray, 1d
            A 1d weight vector used in the calculation of the Hessian.
            The hessian is obtained by `(exog.T * hessian_factor).dot(exog)`
        i    (   R   Rx   R/   R   (   R-   RZ   R^   Ry   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyRz     s    R{   g        g      ?c         K  sO  | d k r% d | } t  |   n  i d d 6d d 6d d	 6}	 |	 j |  | d k r d
 d l m }
 m } |  j | | |	 d	  } |
 |  |  } | |  Sd
 d l m } | d k r |  j |  S| r i  } i  } i  } n' i d d 6} i d d 6} i d d 6} | |  d | d | d | d | d | d | d | d | d t |	 	S(   NR{   t
   sqrt_lassos'   Unknown method '%s' for fit_regularizedi2   t   maxiterg|=t	   cnvrg_tolg:0yE>t   zero_toli(   R   R   (   t   fit_elasticneti    i   R^   RS   R|   R}   R~   t   loglike_kwdst
   score_kwdst	   hess_kwdsR   t
   check_step(   R{   R   (	   R   t   updateR   R   R   t   _sqrt_lassoR   t
   _fit_ridgeR   (   R-   RS   R|   R}   R~   R   R   R0   t   msgt   defaultsR   R   RZ   t   resultsR   R   R   R   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR     s>    


	c      
   C  s  y d d  l  } Wn# t k
 r5 d } t |   n Xt |  j  } |  j j d } | j d d | d d f  } | j d | d d f  }	 | j |  j | d f  |	 d d   d f <| j g  g  g  d | d d | d f  }
 x/ t	 d d | d  D] } d |
 | | f <qW| j d | d d | d f  } d | d <|  j | d d   d | d  f <|  j | d d   | d d   f <| j | | d | d d f  } d t
 j |  | d <d d l  m } t | j d <| j | d	 |
 d
 | d | g d |	 g } t
 j | d  j } | d | d !} | | d } | | } | s_| St
 j t
 j |  | k  } t |  j |  j d  d   | f  j   } | d 9} | j | | <| S(   Nis=   sqrt_lasso fitting requires the cvxopt module to be installedi   g        i   i    (   t   solverst   show_progresst   Glt   hlt   Gqt   hqt   x(   i    i    (   t   cvxoptt   ImportErrorR   Rl   R.   R/   R   t   matrixt   spmatrixR   R   R   R   R   t   optionst   socpR   t   flatt   flatnonzerot   absR   RY   RZ   (   R-   R|   R   R   R   R   t   nt   pt   h0t   h1t   G0t   it   G1t   cR   R   R   t   bpt   bnRZ   t   iit   rfr(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR   
  sB     +.$
#$$-
+
c         C  s#  t  j j |  j d  \ } } } | j } t  j | j |  j  | } | | } t  j |  r | | |  j } | | }	 t  j | |	  }	 nw |  j t  j | | d d  d f |  }
 t  j
 |
  | } t  j |
 |  t  j j |
 |  } t  j | |  }	 d d l m } | |  |	  S(   s  
        Fit a linear model using ridge regression.

        Parameters
        ----------
        alpha : scalar or array-like
            The penalty weight.  If a scalar, the same penalty weight
            applies to all variables in the model.  If a vector, it
            must have the same length as `params`, and contains a
            penalty weight for each coefficient.

        Notes
        -----
        Equivalent to fit_regularized with L1_wt = 0 (but implemented
        more efficiently).
        i    Ni(   R   (   R   R   RL   R/   R   RH   R.   t   isscalarR!   R   RK   t   fill_diagonalRN   R   R   (   R-   R|   t   ut   st   vtt   vt   qt   s2t   sdRZ   t   vtavt   dt   rR   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR   7  s    !	

-N(   Rc   Rd   R   R   R   R   Re   R   R*   Rw   R1   R   R   R   R   Rz   R   R   R   R   R   (    (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR     s   (	%	!	1		-c           B  sW   e  Z d  i e j d 6e j e j d 6Z d
 d d d  Z d d d  Z	 d	   Z
 RS(   sv  
    A regression model with an AR(p) covariance structure.

    %(params)s
    rho : int
        Order of the autoregressive covariance
    %(extra_params)s

    Examples
    --------
    >>> import statsmodels.api as sm
    >>> X = range(1,8)
    >>> X = sm.add_constant(X)
    >>> Y = [1,3,4,5,8,10,9]
    >>> model = sm.GLSAR(Y, X, rho=2)
    >>> for i in range(6):
    ...     results = model.fit()
    ...     print("AR coefficients: {0}".format(model.rho))
    ...     rho, sigma = sm.regression.yule_walker(results.resid,
    ...                                            order=model.order)
    ...     model = sm.GLSAR(Y, X, rho)
    ...
    AR coefficients: [ 0.  0.]
    AR coefficients: [-0.52571491 -0.84496178]
    AR coefficients: [-0.6104153  -0.86656458]
    AR coefficients: [-0.60439494 -0.857867  ]
    AR coefficients: [-0.6048218  -0.85846157]
    AR coefficients: [-0.60479146 -0.85841922]
    >>> results.params
    array([-0.66661205,  1.60850853])
    >>> results.tvalues
    array([ -2.10304127,  21.8047269 ])
    >>> print(results.t_test([1, 0]))
    <T test: effect=array([-0.66661205]), sd=array([[ 0.31697526]]), t=array([[-2.10304127]]), p=array([[ 0.06309969]]), df_denom=3>
    >>> print(results.f_test(np.identity(2)))
    <F test: F=array([[ 1815.23061844]]), p=[[ 0.00002372]], df_denom=3, df_num=2>

    Or, equivalently

    >>> model2 = sm.GLSAR(Y, X, rho=2)
    >>> res = model2.iterative_fit(maxiter=6)
    >>> model2.rho
    array([-0.60479146, -0.85841922])

    Notes
    -----
    GLSAR is considered to be experimental.
    The linear autoregressive process of order p--AR(p)--is defined as:
    TODO
    RZ   Rh   i   Ri   c         K  s  t  | t j  r9 | |  _ t j |  j t j  |  _ nv t j t j |   |  _ t	 |  j j
  d k r{ t d   n  |  j j
 d k r d |  j _
 n  |  j j
 d |  _ | d  k r t t |   j | t j | j
 d d f  d | | n" t t |   j | | d | | d  S(   Ni    i   s*   AR parameters must be a scalar or a vectorRj   (   i    i   (    (   i   (   RO   R   t   intt   ordert   zerost   float64t   rhoR   R   Rl   R   R   R   R)   R   R*   Rx   (   R-   R.   R/   R   Rj   R0   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR*     s    	.i   g-C6?c         K  s  t  } d } i g  d 6|  j g d 6} x t | d  D] } t |  d  rU |  ` n  |  j   |  j   } | d j | j  | d k r | j } nK t	 j
 t	 j | | j  t	 j |   }	 |	 | k  r t } Pn  | j } t | j d |  j d d
 \ |  _ }
 | d j |  j  q7 W| rY| d k rYt |  d  rL|  ` n  |  j   n  |  j d	 | |  } | d | _ | s| j d j | j  | j d 7_ n  | | _ | S(   s  
        Perform an iterative two-stage procedure to estimate a GLS model.

        The model is assumed to have AR(p) errors, AR(p) parameters and
        regression coefficients are estimated iteratively.

        Parameters
        ----------
        maxiter : integer, optional
            the number of iterations
        rtol : float, optional
            Relative tolerance between estimated coefficients to stop the
            estimation.  Stops if

            max(abs(last - current) / abs(last)) < rtol

        iRZ   R   i   R%   i    R   t   dft   historyN(   R   R   R   RG   R%   R6   RY   t   appendRZ   R   t   maxR   R   t   yule_walkerR   R   R   t   iterR   t	   converged(   R-   R   t   rtolt   kwdsR   R   R   R   t   lastt   difft   _(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   iterative_fit  s<    	
,			c         C  sw   t  j | t  j  } | j   } xH t |  j  D]7 } | | d |  j | | d | d !| | d )q1 W| |  j S(   s  
        Whiten a series of columns according to an AR(p)
        covariance structure. This drops initial p observations.

        Parameters
        ----------
        X : array-like
            The data to be whitened,

        Returns
        -------
        whitened array

        i   i    (   R   R   R   t   copyR   R   R   (   R-   R=   t   _XR   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR1     s
    5N(   Rc   Rd   R   R   R   R   Re   R   R*   R   R1   (    (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR   \  s
   2=t   unbiasedc           s  t  |  j   } | d k r- t d   n  t j |  d t j }  | r^ |  |  j   8}  n  | pn |  j d   | d k r   f d   } n   f d   } |  j d k r |  j d d k r t d	   n  t j	 | d t j  } |  d
 j
   | d  | d <xE t d | d  D]0 } |  d | !|  | j
   | |  | | <qWt | d   }	 t j j |	 | d  }
 | d | d |
 j
   } | r|
 t j |  t j j |	  f S|
 t j |  f Sd S(   s  
    Estimate AR(p) parameters from a sequence X using Yule-Walker equation.

    Unbiased or maximum-likelihood estimator (mle)

    See, for example:

    http://en.wikipedia.org/wiki/Autoregressive_moving_average_model

    Parameters
    ----------
    X : array-like
        1d array
    order : integer, optional
        The order of the autoregressive process.  Default is 1.
    method : string, optional
       Method can be 'unbiased' or 'mle' and this determines
       denominator in estimate of autocorrelation function (ACF) at
       lag k. If 'mle', the denominator is n=X.shape[0], if 'unbiased'
       the denominator is n-k.  The default is unbiased.
    df : integer, optional
       Specifies the degrees of freedom. If `df` is supplied, then it
       is assumed the X has `df` degrees of freedom rather than `n`.
       Default is None.
    inv : bool
        If inv is True the inverse of R is also returned.  Default is
        False.
    demean : bool
        True, the mean is subtracted from `X` before estimation.

    Returns
    -------
    rho
        The autoregressive coefficients
    sigma
        TODO

    Examples
    --------
    >>> import statsmodels.api as sm
    >>> from statsmodels.datasets.sunspots import load
    >>> data = load(as_pandas=False)
    >>> rho, sigma = sm.regression.yule_walker(data.endog,
    ...                                        order=4, method="mle")

    >>> rho
    array([ 1.28310031, -0.45240924, -0.20770299,  0.04794365])
    >>> sigma
    16.808022730464351

    R   t   mles1   ACF estimation method must be 'unbiased' or 'MLE't   dtypei    c           s     |  S(   N(    (   t   k(   R   (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   <lambda>>  t    c           s     S(   N(    (   R   (   R   (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR   @  R   i   s,   expecting a vector to estimate AR parametersi   iN(   R   R   (   t   strt   lowerR   R   R   R   t   meanR   R   R   Rn   R   R   R   RN   R   R   (   R=   R   RS   R   R   t   demeant   denomR   R   RW   R   t   sigmasq(    (   R   sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR     s,    8"."c   	      C  s   d d l  m } m } t j t j |    }  |  j d k rL t d   n  t |  } | d k  rs t d   n  | r |  |  j	   }  n  | |  | d | \ } } | |  \ } } | | d f S(   s  
    Burg's AP(p) parameter estimator

    Parameters
    ----------
    endog : array-like
        The endogenous variable
    order : int, optional
        Order of the AR.  Default is 1.
    demean : bool, optional
        Flag indicating to subtract the mean from endog before estimation

    Returns
    -------
    rho : ndarray
        AR(p) coefficients computed using Burg's algorithm
    sigma2 : float
        Estimate of the residual variance

    Notes
    -----
    AR model estimated includes a constant estimated using the sample mean.
    This value is not reported.

    References
    ----------
    .. [1] Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series
        and forecasting. Springer.
    i(   t   levinson_durbin_pacft	   pacf_burgi   s'   endog must be 1-d or squeezable to 1-d.s&   order must be an integer larger than 1R   (
   t   statsmodels.tsa.stattoolsR   R   R   R   R   R   R   R   R   (	   R.   R   R   R   R   t   pacfR    t   arR   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   burgQ  s    RQ   c           B  s  e  Z d  Z i  Z e d d e e d  Z d   Z d e d  Z e d    Z	 e d    Z
 e d	    Z e d
    Z e   d    Z e d    Z e d    Z e d    Z e d    Z e d    Z e d    Z e d    Z e d    Z e d    Z e d    Z e d    Z e d    Z e d    Z e d    Z e d    Z e d    Z d   Z e d    Z  e d    Z! e d    Z" e d     Z# e d!    Z$ e d"    Z% e d#    Z& e d$    Z' e d%    Z( d&   Z) e* e+ d'  Z, d(   Z- e+ d)  Z. d* e d+  Z/ e e* e e d,  Z0 e1 j0 j e0 _ e e e d d-  Z2 e e e d d. d/  Z3 RS(0   sK  
    This class summarizes the fit of a linear regression model.

    It handles the output of contrasts, estimates of covariance, etc.

    Attributes
    ----------
    pinv_wexog
        See specific model class docstring
    cov_HC0
        Heteroscedasticity robust covariance matrix. See HC0_se below.
    cov_HC1
        Heteroscedasticity robust covariance matrix. See HC1_se below.
    cov_HC2
        Heteroscedasticity robust covariance matrix. See HC2_se below.
    cov_HC3
        Heteroscedasticity robust covariance matrix. See HC3_se below.
    cov_type
        Parameter covariance estimator used for standard errors and t-stats
    df_model
        Model degrees of freedom. The number of regressors `p`. Does not
        include the constant if one is present
    df_resid
        Residual degrees of freedom. `n - p - 1`, if a constant is present.
        `n - p` if a constant is not included.
    het_scale
        adjusted squared residuals for heteroscedasticity robust standard
        errors. Is only available after `HC#_se` or `cov_HC#` is called.
        See HC#_se for more information.
    history
        Estimation history for iterative estimators
    HC0_se
        White's (1980) heteroskedasticity robust standard errors.
        Defined as sqrt(diag(X.T X)^(-1)X.T diag(e_i^(2)) X(X.T X)^(-1)
        where e_i = resid[i]
        HC0_se is a cached property.
        When HC0_se or cov_HC0 is called the RegressionResults instance will
        then have another attribute `het_scale`, which is in this case is just
        resid**2.
    HC1_se
        MacKinnon and White's (1985) alternative heteroskedasticity robust
        standard errors.
        Defined as sqrt(diag(n/(n-p)*HC_0)
        HC1_see is a cached property.
        When HC1_se or cov_HC1 is called the RegressionResults instance will
        then have another attribute `het_scale`, which is in this case is
        n/(n-p)*resid**2.
    HC2_se
        MacKinnon and White's (1985) alternative heteroskedasticity robust
        standard errors.
        Defined as (X.T X)^(-1)X.T diag(e_i^(2)/(1-h_ii)) X(X.T X)^(-1)
        where h_ii = x_i(X.T X)^(-1)x_i.T
        HC2_see is a cached property.
        When HC2_se or cov_HC2 is called the RegressionResults instance will
        then have another attribute `het_scale`, which is in this case is
        resid^(2)/(1-h_ii).
    HC3_se
        MacKinnon and White's (1985) alternative heteroskedasticity robust
        standard errors.
        Defined as (X.T X)^(-1)X.T diag(e_i^(2)/(1-h_ii)^(2)) X(X.T X)^(-1)
        where h_ii = x_i(X.T X)^(-1)x_i.T
        HC3_see is a cached property.
        When HC3_se or cov_HC3 is called the RegressionResults instance will
        then have another attribute `het_scale`, which is in this case is
        resid^(2)/(1-h_ii)^(2).
    model
        A pointer to the model instance that called fit() or results.
    params
        The linear coefficients that minimize the least squares
        criterion.  This is usually called Beta for the classical
        linear model.
    resid_pearson
        `wresid` normalized to have unit variance.
    g      ?R?   c         K  sE  t  t |   j | | | |  i  |  _ t | d  rF | j |  _ n	 d  |  _ | j |  _ | j	 |  _	 | d k r d |  _
 i d d d d 6|  _ | d  k r t } n  | |  _ ng | d  k r i  } n  d | k r | j d  }	 | d  k r |	 } q n  |  j d | d	 t d | |  x" | D] }
 t |  |
 | |
  q#Wd  S(
   NRJ   R?   s    Standard Errors assume that the s-   covariance matrix of the errors is correctly s
   specified.t   descriptionRF   RD   t   use_self(   R)   RQ   R*   t   _cacheRG   RJ   t   _wexog_singular_valuesR   R9   R;   RD   RE   R   RF   t   popt   get_robustcov_resultst   setattr(   R-   t   modelRZ   R@   R^   RD   RE   RF   R0   t   use_t_2t   key(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR*     s4    					
c         C  s   |  j    d  S(   N(   t   summary(   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   __str__  s    g?c         C  s%   t  t |   j d | d |  } | S(   s  
        Returns the confidence interval of the fitted parameters.

        Parameters
        ----------
        alpha : float, optional
            The `alpha` level for the confidence interval.
            ie., The default `alpha` = .05 returns a 95% confidence interval.
        cols : array-like, optional
            `cols` specifies which confidence intervals to return

        Notes
        -----
        The confidence interval is based on Student's t-distribution.
        R|   t   cols(   R)   RQ   t   conf_int(   R-   R|   R  t   ci(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR    s    !c         C  s   t  |  j j j d  S(   s   Number of observations n.i    (   R2   R  R'   R   (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR!   	  s    c         C  s   |  j  j |  j |  j  j  S(   s:   The predicted values for the original (unwhitened) design.(   R  R[   RZ   R/   (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   fittedvalues  s    c         C  s&   |  j  j |  j  j |  j |  j  j  S(   sW   
        The residuals of the transformed/whitened regressand and regressor(s)
        (   R  R&   R[   RZ   R'   (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   wresid  s    c         C  s&   |  j  j |  j  j |  j |  j  j  S(   s   The residuals of the model.(   R  R.   R[   RZ   R/   (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR     s    c         C  s    |  j  } t j | |  |  j S(   s   
        A scale factor for the covariance matrix.  Default value is
        ssr/(n-p).  Note that the square root of `scale` is often
        called the standard error of the regression.
        (   R  R   RH   R;   (   R-   R  (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR^   "  s    	c         C  s   |  j  } t j | |  S(   s$   Sum of squared (whitened) residuals.(   R  R   RH   (   R-   R  (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR   ,  s    	c         C  s  |  j  } t | d d  } t | d d  } | d k	 rm t j | j d | } t j | | j | d  S| d k	 r t j | j  } | j |  } | j	 j
 |  | j
 |  } | j | } | j |  } t j | d  S| j	 | j	 j   } t j
 | |  Sd S(   s<   The total (weighted) sum of squares centered about the mean.R(   R    i   N(   R  t   getattrR   R   t   averageR.   Rn   t	   ones_likeR1   R&   RH   R   (   R-   R  R(   R    R   t   iotat   errt   centered_endog(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   centered_tss2  s    	c         C  s   |  j  j } t j | |  S(   s   
        Uncentered sum of squares.  Sum of the squared values of the
        (whitened) endogenous response variable.
        (   R  R&   R   RH   (   R-   R&   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   uncentered_tssG  s    c         C  s)   |  j  r |  j |  j S|  j |  j Sd S(   s   Explained sum of squares. If a constant is present, the centered
        total sum of squares minus the sum of squared residuals. If there is no
        constant, the uncentered total sum of squares is used.N(   R8   R  R   R  (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   essP  s    	c         C  s1   |  j  r d |  j |  j Sd |  j |  j Sd S(   s   
        R-squared of a model with an intercept.  This is defined here
        as 1 - `ssr`/`centered_tss` if the constant is included in the
        model and 1 - `ssr`/`uncentered_tss` if the constant is
        omitted.
        i   N(   R8   R   R  R  (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   rsquared[  s    	c         C  s,   d t  j |  j |  j |  j  d |  j S(   s   
        Adjusted R-squared.  This is defined here as 1 -
        (`nobs`-1)/`df_resid` * (1-`rsquared`) if a constant is
        included and 1 - `nobs`/`df_resid` * (1-`rsquared`) if no
        constant is included.
        i   (   R   t   divideR!   R8   R;   R  (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   rsquared_adjh  s    c         C  s   |  j  |  j S(   s   
        Mean squared error the model. This is the explained sum of
        squares divided by the model degrees of freedom.
        (   R  R9   (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt	   mse_models  s    c         C  s   |  j  |  j S(   s   
        Mean squared error of the residuals.  The sum of squared
        residuals divided by the residual degrees of freedom.
        (   R   R;   (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt	   mse_resid{  s    c         C  s7   |  j  r |  j |  j |  j S|  j |  j |  j Sd S(   s   
        Total mean squared error.  Defined as the uncentered total sum
        of squares divided by n the number of observations.
        N(   R8   R  R;   R9   R  (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt	   mse_total  s    	c         C  s   t  |  d  r |  j d k r |  j j d } t j |  } |  j j j } |  j j j	 d k r | d k rt t j St |  } | j |  | | } | j d k r t j Sn  |  j |  } | j |  j d <| j S|  j |  j Sd S(   s   F-statistic of the fully specified model.  Calculated as the mean
        squared error of the model divided by the mean squared error of the
        residuals.RD   R?   i    i   t   f_pvalueN(   RG   RD   R@   R   R   t   eyeR  t   datat	   const_idxR8   R   t   nanR   R  R   t   f_testt   pvalueR  t   fvalueR  R  (   R-   t   k_paramst   matR$  t   idxt   ft(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR(    s     

c         C  s   t  j j |  j |  j |  j  S(   s   p-value of the F-statistic(   R   t   ft   sfR(  R9   R;   (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR!    s    c         C  s   t  j t  j |  j     S(   s/   The standard errors of the parameter estimates.(   R   R   RK   t
   cov_params(   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   bse  s    c         C  s   d |  j  d |  j |  j S(   s   Akaike's information criteria. For a model with a constant
        :math:`-2llf + 2(df\_model + 1)`. For a model without a constant
        :math:`-2llf + 2(df\_model)`.ii   (   Ru   R9   R8   (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   aic  s    c         C  s)   d |  j  t j |  j  |  j |  j S(   s   Bayes' information criteria. For a model with a constant
        :math:`-2llf + \log(n)(df\_model+1)`. For a model without a constant
        :math:`-2llf + \log(n)(df\_model)`i(   Ru   R   Ro   R!   R9   R8   (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   bic  s    c         C  si   |  j  d k	 r |  j  d } n0 t j j j t j |  j j j |  j j   } t j	 |  d d d  S(   s@   
        Return eigenvalues sorted in decreasing order.
        i   Ni(
   R  R   R   R   t   eigvalshRH   R  R'   R   t   sort(   R-   t   eigvals(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt	   eigenvals  s
    c         C  s"   |  j  } t j | d | d  S(   s~   
        Return condition number of exogenous matrix.

        Calculated as ratio of largest to smallest eigenvalue.
        i    i(   R6  R   R   (   R-   R5  (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   condition_number  s    	c         C  s9   t  j |  j j | d  d   d  f |  j j j  } | S(   N(   R   RH   R  R%   R   R   (   R-   R^   t   H(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   _HCCM  s    &c         C  s&   |  j  d |  _ |  j |  j  } | S(   s3   
        See statsmodels.RegressionResults
        i   (   R  t	   het_scaleR9  (   R-   t   cov_HC0(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR;    s    c         C  s4   |  j  |  j |  j d |  _ |  j |  j  } | S(   s3   
        See statsmodels.RegressionResults
        i   (   R!   R;   R  R:  R9  (   R-   t   cov_HC1(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR<    s    c         C  s[   t  j t |  j j |  j |  j j j   } |  j d d | |  _ |  j	 |  j  } | S(   s3   
        See statsmodels.RegressionResults
        i   i   (
   R   RK   R   R  R'   R@   R   R  R:  R9  (   R-   t   ht   cov_HC2(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR>    s    c         C  s[   t  j t |  j j |  j |  j j j   } |  j d | d |  _ |  j	 |  j  } | S(   s3   
        See statsmodels.RegressionResults
        i   i   (
   R   RK   R   R  R'   R@   R   R  R:  R9  (   R-   R=  t   cov_HC3(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR?    s
    	$c         C  s   t  j t  j |  j   S(   s3   
        See statsmodels.RegressionResults
        (   R   R   RK   R;  (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   HC0_se  s    c         C  s   t  j t  j |  j   S(   s3   
        See statsmodels.RegressionResults
        (   R   R   RK   R<  (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   HC1_se  s    c         C  s   t  j t  j |  j   S(   s3   
        See statsmodels.RegressionResults
        (   R   R   RK   R>  (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   HC2_se  s    c         C  s   t  j t  j |  j   S(   s3   
        See statsmodels.RegressionResults
        (   R   R   RK   R?  (   R-   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   HC3_se%  s    c         C  s   t  |  d  s t d   n  t j |  j j  j } t j |  j  d | |  j	 j
 j   k  r d d l m } | d t  |  j S|  j t j |  j  Sd S(   s   
        Residuals, normalized to have unit variance.

        Returns
        -------
        An array wresid standardized by the sqrt if scale
        R   s   Method requires residuals.i
   i(   t   warns5   All residuals are 0, cannot compute normed residuals.N(   RG   R   R   t   finfoR  R   t   epsR   R^   R  R.   R   t   warningsRD  t   RuntimeWarning(   R-   RF  RD  (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   resid_pearson,  s    
,c         C  s   |  j  j | j  j k r t S|  j  j } | j  j } | | k rD t S| j  j } |  j } | | d d  d f } t j t j	 | j	 d  d   } t j
 | d  S(   s  
        Parameters
        ----------
        restricted : Result instance
            The restricted model is assumed to be nested in the current
            model. The result instance of the restricted model is required to
            have two attributes, residual sum of squares, `ssr`, residual
            degrees of freedom, `df_resid`.

        Returns
        -------
        nested : bool
            True if nested, otherwise false

        Notes
        -----
        A most nests another model if the regressors in the smaller
        model are spanned by the regressors in the larger model and
        the regressand is identical.
        Ni    i   (   R  R!   R   R5   R'   R  R   R   R   R   t   allclose(   R-   t
   restrictedt	   full_rankt   restricted_rankt   restricted_exogt   full_wresidt   scorest   score_l2(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt
   _is_nestedB  s    	%c         C  s@  d d l  j j } d d l m } |  j |  s@ t d   n  | j } |  j j	 } | | d d  d f } |  j }	 |  j }
 | j } | |
 } | j d d  } | r | |  j d d  d f } t } n  | r | | j d  d d d  f } n  t |  d d  } | d k r[t j | d	  } t j | j |  |	 } | | |  } n | d k r| t j | j |  |	  } n| | d k r|  j d } | | j | |  |	  } nD | d k r|  j d } | | j | |   } n t d d   |	 t | | | j  } t j j | |  } | | | f S(   s'  Use Lagrange Multiplier test to test whether restricted model is correct

        Parameters
        ----------
        restricted : Result instance
            The restricted model is assumed to be nested in the
            current model. The result instance of the restricted model
            is required to have two attributes, residual sum of
            squares, `ssr`, residual degrees of freedom, `df_resid`.

        demean : bool
            Flag indicating whether the demean the scores based on the
            residuals from the restricted model.  If True, the
            covariance of the scores are used and the LM test is
            identical to the large sample version of the LR test.

        Returns
        -------
        lm_value : float
            test statistic, chi2 distributed
        p_value : float
            p-value of the test statistic
        df_diff : int
            degrees of freedom of the restriction, i.e. difference in df
            between models

        Notes
        -----
        TODO: explain LM text
        iN(   R   s-   Restricted model is not nested by full model.Rm   i    RD   R?   i   t   HC0t   HC1t   HC2t   HC3t   HACt   maxlagst   clustert   groupss(   Only nonrobust, HC, HAC and cluster are s   currently connected(   RS  RT  RU  RV  (   t%   statsmodels.stats.sandwich_covarianceR   t   sandwich_covariancet   numpy.linalgR   RR  R   R  R  R'   R   R!   R;   R   R   R  R   RH   R   RE   t   S_hac_simplet   S_crosssectionR   t   chi2R.  (   R-   RK  R   t   use_lrt   swR   R  R'   RP  R   t   df_fullt   df_restrt   df_diffR   RD   t   sigma2t   XpXt   SinvRX  RZ  t   lm_valuet   p_value(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   compare_lm_testi  sF    				
	&"
c         C  s   t  |  d d  d k } t  | d d  d k } | s< | rS t j d d t  n  |  j } | j } |  j } | j } | | } | | | | | }	 t j j |	 | |  }
 |	 |
 | f S(   sl  use F test to test whether restricted model is correct

        Parameters
        ----------
        restricted : Result instance
            The restricted model is assumed to be nested in the
            current model. The result instance of the restricted model
            is required to have two attributes, residual sum of
            squares, `ssr`, residual degrees of freedom, `df_resid`.

        Returns
        -------
        f_value : float
            test statistic, F distributed
        p_value : float
            p-value of the test statistic
        df_diff : int
            degrees of freedom of the restriction, i.e. difference in
            df between models

        Notes
        -----
        See mailing list discussion October 17,

        This test compares the residual sum of squares of the two
        models.  This is not a valid test, if there is unspecified
        heteroscedasticity or correlation. This method will issue a
        warning if this is detected but still return the results under
        the assumption of homoscedasticity and no autocorrelation
        (sphericity).
        RD   R?   s-   F test for comparison is likely invalid with s$   robust covariance, proceeding anyway(	   R  RG  RD  R   R   R;   R   R-  R.  (   R-   RK  t   has_robust1t   has_robust2t   ssr_fullt	   ssr_restrRc  Rd  Re  t   f_valueRj  (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   compare_f_test  s    !		
				
c         C  s   | r |  j  | d t St |  d d  d k } t | d d  d k } | sU | rl t j d d t  n  |  j } | j } |  j } | j } | | }	 d | | }
 t j	 j
 |
 |	  } |
 | |	 f S(   s  
        Likelihood ratio test to test whether restricted model is correct

        Parameters
        ----------
        restricted : Result instance
            The restricted model is assumed to be nested in the current model.
            The result instance of the restricted model is required to have two
            attributes, residual sum of squares, `ssr`, residual degrees of
            freedom, `df_resid`.

        large_sample : bool
            Flag indicating whether to use a heteroskedasticity robust version
            of the LR test, which is a modified LM test.

        Returns
        -------
        lr_stat : float
            likelihood ratio, chisquare distributed with df_diff degrees of
            freedom
        p_value : float
            p-value of the test statistic
        df_diff : int
            degrees of freedom of the restriction, i.e. difference in df
            between models

        Notes
        -----

        The exact likelihood ratio is valid for homoskedastic data,
        and is defined as

        .. math:: D=-2\log\left(\frac{\mathcal{L}_{null}}
           {\mathcal{L}_{alternative}}\right)

        where :math:`\mathcal{L}` is the likelihood of the
        model. With :math:`D` distributed as chisquare with df equal
        to difference in number of parameters or equivalently
        difference in residual degrees of freedom.

        The large sample version of the likelihood ratio is defined as

        .. math:: D=n s^{\prime}S^{-1}s

        where :math:`s=n^{-1}\sum_{i=1}^{n} s_{i}`

        .. math:: s_{i} = x_{i,alternative} \epsilon_{i,null}

        is the average score of the model evaluated using the
        residuals from null model and the regressors from the
        alternative model and :math:`S` is the covariance of the
        scores, :math:`s_{i}`.  The covariance of the scores is
        estimated using the same estimator as in the alternative
        model.

        This test compares the loglikelihood of the two models.  This
        may not be a valid test, if there is unspecified
        heteroscedasticity or correlation. This method will issue a
        warning if this is detected but still return the results
        without taking unspecified heteroscedasticity or correlation
        into account.

        This test compares the loglikelihood of the two models.  This
        may not be a valid test, if there is unspecified
        heteroscedasticity or correlation. This method will issue a
        warning if this is detected but still return the results
        without taking unspecified heteroscedasticity or correlation
        into account.

        is the average score of the model evaluated using the
        residuals from null model and the regressors from the
        alternative model and :math:`S` is the covariance of the
        scores, :math:`s_{i}`.  The covariance of the scores is
        estimated using the same estimator as in the alternative
        model.

        TODO: put into separate function, needs tests
        Ra  RD   R?   s-   Likelihood Ratio test is likely invalid with s$   robust covariance, proceeding anywayi(   Rk  R   R  RG  RD  R   Ru   R;   R   R`  R.  (   R-   RK  t   large_sampleRl  Rm  t   llf_fullt	   llf_restrRc  Rd  t   lrdft   lrstatt	   lr_pvalue(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   compare_lr_test  s     R	
				
RT  c         K  su  d d l  j j } d d l m } m } | |  } d | k rV | j d  | d <n  d | k r t | d  r | j | d | d <n  | j d t	  } | r |  } n* |  j
 |  j |  j d |  j d |  j } | | _ | d k r |  j } n  i | d	 6| _ | | _ t	 }	 | d/ k rL| j d d  }
 |
 t	 k	 rLt }	 qLn  |	 | j d <| d0 k r| d | j d <| j d d  | j d <} | | j | _ n| j   d1 k r| rt d   n  | | j   | j d <t |  | j   d  t |  d | j    | _ nA| j   d k r| d } | | j d <| j d | j  } | | j d <| j d t	  } | | j d <| d j d | d d d  g |  | j d <| j |  d! | d | d | | _ n| j   d
 k r| d" } t | d#  st j |  j  } n  | j! d$ k r6| j"   } n  | | j d" <| j d t  } | | j d <| j! d% k r|	 rt# t j$ |   |  _% } n  | j& |  | d | | _ n | j! d$ k rqt | d&  r| j' } n  |	 rOt# t j$ | d d  d' f   } t# t j$ | d d  d% f   } | | f |  _% t( | |  } n  | j) |  | d | d' | _ n t d(   | d
 | j d <n| j   d k rQ| j d) d  | j d) <} | j d" d  | j d" <} | d | j d <} | j d d  } | | j d <| j d | j  } | | j d <| d k	 rt j |  } t j* | d  | d% k  d' d% j+   } t# |  } na | d k	 rt j |  } t j* | d% | d  k   d' d% j+   } t# |  } n t d*   t, d' g | | | g  } t# |  |  _% } | j- |  | | d | d | | _ | d+ | j d <n
| j   d k rK| d) | j d) <} | d | j d <} | j d d
  } | | j d <| j d | j  } | | j d <|	 rt j* | d% | d  k   d' d% } t# |  d% |  _% } n  | j. |  | | d | d | | _ | d, | j d <n t d- d.   |	 rq| d% | _/ n  | S(2   s  create new results instance with robust covariance as default

        Parameters
        ----------
        cov_type : string
            the type of robust sandwich estimator to use. see Notes below
        use_t : bool
            If true, then the t distribution is used for inference.
            If false, then the normal distribution is used.
            If `use_t` is None, then an appropriate default is used, which is
            `true` if the cov_type is nonrobust, and `false` in all other
            cases.
        kwds : depends on cov_type
            Required or optional arguments for robust covariance calculation.
            see Notes below

        Returns
        -------
        results : results instance
            This method creates a new results instance with the
            requested robust covariance as the default covariance of
            the parameters.  Inferential statistics like p-values and
            hypothesis tests will be based on this covariance matrix.

        Notes
        -----
        The following covariance types and required or optional arguments are
        currently available:

        - 'fixed scale' and optional keyword argument 'scale' which uses
            a predefined scale estimate with default equal to one.
        - 'HC0', 'HC1', 'HC2', 'HC3' and no keyword arguments:
            heteroscedasticity robust covariance
        - 'HAC' and keywords

            - `maxlag` integer (required) : number of lags to use
            - `kernel` callable or str (optional) : kernel
                  currently available kernels are ['bartlett', 'uniform'],
                  default is Bartlett
            - `use_correction` bool (optional) : If true, use small sample
                  correction

        - 'cluster' and required keyword `groups`, integer group indicator

            - `groups` array_like, integer (required) :
                  index of clusters or groups
            - `use_correction` bool (optional) :
                  If True the sandwich covariance is calculated with a small
                  sample correction.
                  If False the sandwich covariance is calculated without
                  small sample correction.
            - `df_correction` bool (optional)
                  If True (default), then the degrees of freedom for the
                  inferential statistics and hypothesis tests, such as
                  pvalues, f_pvalue, conf_int, and t_test and f_test, are
                  based on the number of groups minus one instead of the
                  total number of observations minus the number of explanatory
                  variables. `df_resid` of the results instance is adjusted.
                  If False, then `df_resid` of the results instance is not
                  adjusted.

        - 'hac-groupsum' Driscoll and Kraay, heteroscedasticity and
            autocorrelation robust standard errors in panel data
            keywords

            - `time` array_like (required) : index of time periods
            - `maxlag` integer (required) : number of lags to use
            - `kernel` callable or str (optional) : kernel
                  currently available kernels are ['bartlett', 'uniform'],
                  default is Bartlett
            - `use_correction` False or string in ['hac', 'cluster'] (optional) :
                  If False the the sandwich covariance is calulated without
                  small sample correction.
                  If `use_correction = 'cluster'` (default), then the same
                  small sample correction as in the case of 'covtype='cluster''
                  is used.
            - `df_correction` bool (optional)
                  adjustment to df_resid, see cov_type 'cluster' above
                  # TODO: we need more options here

        - 'hac-panel' heteroscedasticity and autocorrelation robust standard
            errors in panel data.
            The data needs to be sorted in this case, the time series
            for each panel unit or cluster need to be stacked. The
            membership to a timeseries of an individual or group can
            be either specified by group indicators or by increasing
            time periods.

            keywords

            - either `groups` or `time` : array_like (required)
              `groups` : indicator for groups
              `time` : index of time periods
            - `maxlag` integer (required) : number of lags to use
            - `kernel` callable or str (optional) : kernel
                  currently available kernels are ['bartlett', 'uniform'],
                  default is Bartlett
            - `use_correction` False or string in ['hac', 'cluster'] (optional) :
                  If False the sandwich covariance is calculated without
                  small sample correction.
            - `df_correction` bool (optional)
                  adjustment to df_resid, see cov_type 'cluster' above
                  # TODO: we need more options here

        Reminder:
        `use_correction` in "hac-groupsum" and "hac-panel" is not bool,
        needs to be in [False, 'hac', 'cluster']

        TODO: Currently there is no check for extra or misspelled keywords,
        except in the case of cov_type `HCx`
        iN(   t   normalize_cov_typet   descriptionst   kernelt   weights_funcR  R@   R^   RF   RY  s	   hac-panels   hac-groupsumt   df_correctiont	   adjust_dfs   fixed scalet   fixed_scaleR  g      ?RS  RT  RU  RV  s:   heteroscedasticity robust covariance does not use keywordst   _set   cov_t   hacRX  t   use_correctionRW  t
   correctiont   withoutt   witht   nlagsRZ  R   i   i   t   valuesi    s   only two groups are supportedt   times'   either time or groups needs to be givens	   HAC-Panels   HAC-Groupsums+   cov_type not recognized. See docstring for s   available options and spelling(   RY  s	   hac-panels   hac-groupsum(   s   fixed scaleR  (   RS  RT  RU  RV  (0   R[  R   R\  t   statsmodels.base.covtypeRy  Rz  R  t   callablet   kernel_dictR   t	   __class__R  RZ   R@   R^   RD   R   RF   RE   t   getR   t   cov_params_defaultt   upperR   R  R   t   weights_bartlettt   formatt   cov_hac_simpleRG   R   R   R   R   R   Rl   t   uniquet   n_groupst   cov_clusterR  t   mint   cov_cluster_2groupst   nonzerot   tolistR   t   cov_nw_panelt   cov_nw_groupsumt   df_resid_inference(   R-   RD   RF   R   Rb  Ry  Rz  R  t   resR~  R}  R^   RX  R|  R  RZ  R  t	   n_groups0t	   n_groups1R  t   ttt   nobs_t   groupidx(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR  W  s    p				

%%++%
c         K  s(   t  j |  d | d | d | d | | S(   NR/   t	   transformR(   t
   row_labels(   t   predt   get_prediction(   R-   R/   R  R(   R  R   (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR  o	  s    c         C  sE  d d l  m } m } m } | |  j  \ } }	 }
 } | |  j  \ } } |  j } |  j } t d | d |	 d |
 d | d | d | d	 | d
 | d  |  _ dH dI d d g f dJ dK dL dM dN g } t
 |  d  r | j d |  j g f  n  |  j r d n d } d | d d |  j g f d | d d |  j g f d d |  j g f d d |  j g f dO d! d |  j g f d" d |  j g f g } d# d$ | g f d% d$ | g f d& d$ |
 g f d' d$ | g f g } d( d | |  j  g f d) d | g f d* d+ |	 g f d, d+ | g f g } | dG k rK|  j j j d- d. } n  d d/ l m } |   } | j |  d0 | d1 | d2 | d3 | d4 | | j |  d2 | d3 | d5 | d6 |  j | j |  d0 | d1 | d2 | d3 | d4 d g  } t
 |  d  r| j |  j d7  n  |  j j j d8 |  j j j d9 k  rJd: } | j |  n  | d d; k  rd< } | d= 7} | d> 7} | d? 7} | | d } | j |  nJ | d@ k rdA } | dB 7} | dC 7} | dD 7} | | } | j |  n  | rAg  t |  D]" \ } } dE j  | d9 |  ^ q} | j! d8 dF  | j" |  n  | S(P   s  Summarize the Regression Results

        Parameters
        ----------
        yname : string, optional
            Default is `y`
        xname : list of strings, optional
            Default is `var_##` for ## in p the number of regressors
        title : string, optional
            Title for the top table. If not None, then this replaces the
            default title
        alpha : float
            significance level for the confidence intervals

        Returns
        -------
        smry : Summary instance
            this holds the summary tables and text, which can be printed or
            converted to various output formats.

        See Also
        --------
        statsmodels.iolib.summary.Summary : class to hold summary results
        i(   t   jarque_berat   omni_normtestt   durbin_watsont   jbt   jbpvt   skewt   kurtosist   omnit   omnipvt   condnot	   mineigvals   Dep. Variable:s   Model:s   Method:s   Least Squaress   Date:s   Time:s   No. Observations:s   Df Residuals:s	   Df Model:RD   s   Covariance Type:R   s    (uncentered)s	   R-squaredt   :s   %#8.3fs   Adj. R-squareds   F-statistic:s   %#8.4gs   Prob (F-statistic):s   %#6.3gs   Log-Likelihood:s   AIC:s   BIC:s   Omnibus:s   %#6.3fs   Prob(Omnibus):s   Skew:s	   Kurtosis:s   Durbin-Watson:s   Jarque-Bera (JB):s	   Prob(JB):s   %#8.3gs	   Cond. No.t    s   Regression Results(   t   Summaryt   gleftt   grightt   ynamet   xnamet   titleR|   RF   R  i    i   s9   The input rank is higher than the number of observations.g|=s6   The smallest eigenvalue is %6.3g. This might indicate s   that there are
s5   strong multicollinearity problems or that the design s   matrix is singular.i  s1   The condition number is large, %6.3g. This might s   indicate that there are
s,   strong multicollinearity or other numerical s	   problems.s	   [{0}] {1}s	   Warnings:N(   s   Dep. Variable:N(   s   Model:N(   s   Date:N(   s   Time:N(   s   No. Observations:N(   s   Df Residuals:N(   s	   Df Model:N(   s   Log-Likelihood:N(#   t   statsmodels.stats.stattoolsR  R  R  R  R6  R7  t   dictt   diagnR   RG   R   RD   R8   R  R  R(  R!  R1  R2  R  R  Rc   t   statsmodels.iolib.summaryR  t   add_table_2colst   add_table_paramsRF   RE   R/   R   t	   enumerateR  t   insertt   add_extra_txt(   R-   R  R  R  R|   R  R  R  R  R  R  R  R  R  R5  R  t   top_leftt   rsquared_typet	   top_rightt
   diagn_leftt   diagn_rightR  t   smryt   etextt   wstrR   t   text(    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR  x	  s    		
		
&






2s   %.4fc         C  s  d d l  m } m } m } d d l m }	 | |  j  \ }
 } } } | |  j  \ } } | |  j  } |  j } |  j } t	 j
 |  } |	 d d | f d d | f d d | f d d | f d	 d | f d
 d |
 f d d | f d d | f g  } d d l m } | j   } | j d |  d | d | d | d | d |  | j |  | d d k  rd | d } | j |  n  | d k rd | } | j |  n  | S(   s  Experimental summary function to summarize the regression results

        Parameters
        ----------
        xname : List of strings of length equal to the number of parameters
            Names of the independent variables (optional)
        yname : string
            Name of the dependent variable (optional)
        title : string, optional
            Title for the top table. If not None, then this replaces the
            default title
        alpha : float
            significance level for the confidence intervals
        float_format: string
            print format for floats in parameters summary

        Returns
        -------
        smry : Summary instance
            this holds the summary tables and text, which can be printed or
            converted to various output formats.

        See Also
        --------
        statsmodels.iolib.summary2.Summary : class to hold summary results

        i(   R  R  R  (   t   OrderedDicts   Omnibus:s   %.3fs   Prob(Omnibus):s   Skew:s	   Kurtosis:s   Durbin-Watson:s   Jarque-Bera (JB):s	   Prob(JB):s   Condition No.:s   %.0f(   t   summary2R   R|   t   float_formatR  R  R  g|=s   The smallest eigenvalue is %6.3g. This might indicate that            there are strong multicollinearity problems or that the design            matrix is singular.i  s|   * The condition number is large (%.g). This might indicate             strong multicollinearity or other numerical problems.(   R  R  R  R  t   collectionsR  R  R6  R7  R   R4  t   statsmodels.iolibR  R  t   add_baset   add_dictt   add_text(   R-   R  R  R  R|   R  R  R  R  R  R  R  R  R  R  R  t   dwR5  R  t
   diagnosticR  R  RD  (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR  	  s:    		
(4   Rc   Rd   Re   R  R   R*   R  R  R	   R!   R  R  R   R
   R^   R   R  R  R  R  R  R  R  R   R(  R!  R0  R1  R2  R6  R7  R9  R;  R<  R>  R?  R@  RA  RB  RC  RI  RR  R   R   Rk  Rq  Rx  R  R  R  R  R  (    (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyRQ     sb   J$	
	 	

	'Q	4i 	RP   c           B  s_   e  Z d  Z d   Z d d d
 e d
 d  Z d d d d d d  Z d d
 d
 d d d	  Z RS(   s2  
    Results class for for an OLS model.

    Most of the methods and attributes are inherited from RegressionResults.
    The special methods that are only available for OLS are:

    - get_influence
    - outlier_test
    - el_test
    - conf_int_el

    See Also
    --------
    RegressionResults

    c         C  s   d d l  m } | |   S(   so  
        get an instance of Influence with influence and outlier measures

        Returns
        -------
        infl : Influence instance
            the instance has methods to calculate the main influence and
            outlier measures for the OLS regression

        See Also
        --------
        statsmodels.stats.outliers_influence.OLSInfluence
        i(   t   OLSInfluence(   t$   statsmodels.stats.outliers_influenceR  (   R-   R  (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   get_influenceT
  s    t   bonfg?c      
   C  s2   d d l  m } | |  | | d | d | d | S(   sm  
        Test observations for outliers according to method

        Parameters
        ----------
        method : str

            - `bonferroni` : one-step correction
            - `sidak` : one-step correction
            - `holm-sidak` :
            - `holm` :
            - `simes-hochberg` :
            - `hommel` :
            - `fdr_bh` : Benjamini/Hochberg
            - `fdr_by` : Benjamini/Yekutieli

            See `statsmodels.stats.multitest.multipletests` for details.
        alpha : float
            familywise error rate
        labels : None or array_like
            If `labels` is not None, then it will be used as index to the
            returned pandas DataFrame. See also Returns below
        order : bool
            Whether or not to order the results by the absolute value of the
            studentized residuals. If labels are provided they will also be sorted.
        cutoff : None or float in [0, 1]
            If cutoff is not None, then the return only includes observations with
            multiple testing corrected p-values strictly below the cutoff. The
            returned array or dataframe can be empty if t

        Returns
        -------
        table : ndarray or DataFrame
            Returns either an ndarray or a DataFrame if labels is not None.
            Will attempt to get labels from model_results if available. The
            columns are the Studentized residuals, the unadjusted p-value,
            and the corrected p-value according to method.

        Notes
        -----
        The unadjusted p-value is stats.t.sf(abs(resid), df) where
        df = df_resid - 1.
        i(   t   outlier_testt   labelsR   t   cutoff(   R  R  (   R-   RS   R|   R  R   R  R  (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR  e
  s    -i    t   nmi   c         C  s  t  j |  j  } t   }	 t |  t |  k r |	 j g  d | d |  j j d |  j j d |  j j	 d |  j j j
 d d | d | d	 | }
 d t j j |
 t |   } | r |
 | |	 j f S|
 | f Sn  t  j | |  } | |  j j |  j j |  j j	 |  j j j
 d | | | f } | d
 k rht j |	 j | d d d d d d d d d | d }
 n  | d k rt j |	 j | d d d d d | d }
 n  d t j j |
 t |   } | r|
 | |	 j |	 j f S| r|
 | |	 j f S|
 | f Sd S(   s  
        Tests single or joint hypotheses of the regression parameters using
        Empirical Likelihood.

        Parameters
        ----------

        b0_vals : 1darray
            The hypothesized value of the parameter to be tested

        param_nums : 1darray
            The parameter number to be tested

        print_weights : bool
            If true, returns the weights that optimize the likelihood
            ratio at b0_vals.  Default is False

        ret_params : bool
            If true, returns the parameter vector that maximizes the likelihood
            ratio at b0_vals.  Also returns the weights.  Default is False

        method : string
            Can either be 'nm' for Nelder-Mead or 'powell' for Powell.  The
            optimization method that optimizes over nuisance parameters.
            Default is 'nm'

        stochastic_exog : bool
            When TRUE, the exogenous variables are assumed to be stochastic.
            When the regressors are nonstochastic, moment conditions are
            placed on the exogenous variables.  Confidence intervals for
            stochastic regressors are at least as large as non-stochastic
            regressors.  Default = TRUE

        Returns
        -------

        res : tuple
            The p-value and -2 times the log-likelihood ratio for the
            hypothesized values.

        Examples
        --------
        >>> import statsmodels.api as sm
        >>> data = sm.datasets.stackloss.load(as_pandas=False)
        >>> endog = data.endog
        >>> exog = sm.add_constant(data.exog)
        >>> model = sm.OLS(endog, exog)
        >>> fitted = model.fit()
        >>> fitted.params
        >>> array([-39.91967442,   0.7156402 ,   1.29528612,  -0.15212252])
        >>> fitted.rsquared
        >>> 0.91357690446068196
        >>> # Test that the slope on the first variable is 0
        >>> fitted.el_test([0], [1])
        >>> (27.248146353888796, 1.7894660442330235e-07)
        t
   param_numsR.   R/   R!   t   nvari   RZ   t   b0_valst   stochastic_exogR  t   maxfuni'  R   t   full_outputt   dispi    t   argst   powellN(   R   R   RZ   R   Rl   t   _opt_nuis_regressR  R.   R/   R!   R   R   R`  t   cdft   new_weightst   deleteR   t   fmint   fmin_powellt
   new_params(   R-   R  R  t   return_weightst
   ret_paramsRS   R  t   return_paramsRZ   t   opt_fun_instt   llrt   pvalt   x0R  (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   el_test
  sF    ;		c   
        s   t  j j d | d   | d k r?  j d   d } n  | d k re  j d   d } n        f d   } t j | |  j   } t j |  j  |  }	 | |	 f S(   s  
        Computes the confidence interval for the parameter given by param_num
        using Empirical Likelihood

        Parameters
        ----------

        param_num : float
            The parameter for which the confidence interval is desired

        sig : float
            The significance level.  Default is .05

        upper_bound : float
            The maximum value the upper limit can be.  Default is the
            99.9% confidence value under OLS assumptions.

        lower_bound : float
            The minimum value the lower limit can be.  Default is the 99.9%
            confidence value under OLS assumptions.

        method : string
            Can either be 'nm' for Nelder-Mead or 'powell' for Powell.  The
            optimization method that optimizes over nuisance parameters.
            Default is 'nm'

        Returns
        -------

        ci : tuple
            The confidence interval

        See Also
        --------

        el_test

        Notes
        -----

        This function uses brentq to find the value of beta where
        test_beta([beta], param_num)[1] is equal to the critical
        value.

        The function returns the results of each iteration of brentq at
        each value of beta.

        The current function value of the last printed optimization
        should be the critical value at the desired significance level.
        For alpha=.05, the value is 3.841459.

        To ensure optimization terminated successfully, it is suggested to
        do el_test([lower_limit], [param_num])

        If the optimization does not terminate successfully, consider switching
        optimization algorithms.

        If optimization is still not successful, try changing the values of
        start_int_params.  If the current function value repeatedly jumps
        from a number between 0 and the critical value and a very large number
        (>50), the starting parameters of the interior minimization need
        to be changed.
        i   g{Gz?i    c           s<    j  t j |  g  t j  g  d   d  d  S(   NRS   R  i    (   R  R   R   (   t   b0(   RS   t	   param_numt   r0R-   R  (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyR   >  s   'N(   R   R`  t   ppfR   R  R   t   brenthRZ   (
   R-   R  t   sigt   upper_boundt   lower_boundRS   R  R-  t   lowerlt   upperl(    (   RS   R  R  R-   R  sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   conf_int_el
  s    A	N(	   Rc   Rd   Re   R  R   R   R  R  R  (    (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyRP   B
  s   		0`RR   c           B  s   e  Z i d  d 6d d 6d d 6d d 6d d 6d d 6d  d	 6d  d
 6d  d 6d  d 6d d 6Z e j e j j e  Z i  Z e j e j j	 e  Z	 RS(   t   columnst   chisqt   rowst   sresidR(   R  t   covt   bcov_unscaledt   bcov_scaledR@  RA  RB  RC  t
   norm_resid(
   Rc   Rd   t   _attrst   wrapt   union_dictsR   t   LikelihoodResultsWrappert   _wrap_attrst   _methodst   _wrap_methods(    (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyRR   I  s$   
		(9   Re   t
   __future__R    t   statsmodels.compat.pythonR   R   R   t   numpyR   t   scipy.linalgR   t   scipyR   R   t   statsmodels.tools.toolsR   R   t   statsmodels.tools.decoratorsR	   R
   t   statsmodels.base.modelR   R  t   statsmodels.base.wrappert   wrapperR  t   statsmodels.emplike.elregressR   RG  t   statsmodels.tools.sm_exceptionsR   t"   statsmodels.regression._predictionR   R   R   R  t   __docformat__t   __all__R   R#   t   LikelihoodModelR$   R   R   R   R   R   R   R   R   R   t   LikelihoodModelResultsRQ   RP   t   ResultsWrapperRR   t   populate_wrapper(    (    (    sB   lib/python2.7/site-packages/statsmodels/regression/linear_model.pyt   <module>    sL   g	 FS.     	