ó
áp7]c           @   s<  d  Z  d d l m Z d d l Z d d l m Z m Z m Z m	 Z	 m
 Z
 m Z d d l m Z d d l Z d d l m Z d d l m Z d d l m Z d d	 l m Z d
 Z d Z d d d „ Z d d „ Z d „  Z d „  Z d e d e _  d e f d „  ƒ  YZ d e f d „  ƒ  YZ  d e f d „  ƒ  YZ! d S(   s*   General linear model

author: Yichuan Liu
iÿÿÿÿ(   t   divisionN(   t   eigvalst   invt   solvet   matrix_rankt   pinvt   svd(   t   stats(   t
   DesignInfo(   t   string_types(   t   Model(   t   summary2s   restructuredtext ens9  hypotheses: A list of tuples
    Hypothesis `L*B*M = C` to be tested where B is the parameters in
    regression Y = X*B. Each element is a tuple of length 2, 3, or 4:

      * (name, contrast_L)
      * (name, contrast_L, transform_M)
      * (name, contrast_L, transform_M, constant_C)

    containing a string `name`, the contrast matrix L, the transform
    matrix M (for transforming dependent variables), and right-hand side
    constant matrix constant_C, respectively.

    contrast_L : 2D array or an array of strings
        Left-hand side contrast matrix for hypotheses testing.
        If 2D array, each row is an hypotheses and each column is an
        independent variable. At least 1 row
        (1 by k_exog, the number of independent variables) is required.
        If an array of strings, it will be passed to
        patsy.DesignInfo().linear_constraint.

    transform_M : 2D array or an array of strings or None, optional
        Left hand side transform matrix.
        If `None` or left out, it is set to a k_endog by k_endog
        identity matrix (i.e. do not transform y matrix).
        If an array of strings, it will be passed to
        patsy.DesignInfo().linear_constraint.

    constant_C : 2D array or None, optional
        Right-hand side constant matrix.
        if `None` or left out it is set to a matrix of zeros
        Must has the same number of rows as contrast_L and the same
        number of columns as transform_M

    If `hypotheses` is None: 1) the effect of each independent variable
    on the dependent variables will be tested. Or 2) if model is created
    using a formula,  `hypotheses` will be created according to
    `design_info`. 1) and 2) is equivalent if no additional variables
    are created by the formula (e.g. dummy variables for categorical
    variables and interaction terms)
R   g:Œ0âŽyE>c         C   s,  |  } | } | j  \ } } | j  \ } }	 | | k rO t d | | f ƒ ‚ n  | |	 }
 | d k rt | ƒ } | j | ƒ } | j | j ƒ } t | d | ƒ|	 k  r¹ t d ƒ ‚ n  | j | ƒ } t j | j j | ƒ | j j | ƒ ƒ } | |
 | | f S| d k rt | d ƒ \ } } } | | k j	 ƒ  t
 | ƒ k  rSt d ƒ ‚ n  d | } | j j t j | ƒ ƒ j | j ƒ j | ƒ } | j j t j t j | d ƒ ƒ ƒ j | ƒ } t j | ƒ j | ƒ j | ƒ } t j | j j | ƒ | j j | ƒ ƒ } | |
 | | f St d	 | ƒ ‚ d
 S(   s(  
    Solve multivariate linear model y = x * params
    where y is dependent variables, x is independent variables

    Parameters
    ----------
    endog : array_like
        each column is a dependent variable
    exog : array_like
        each column is a independent variable
    method : string
        'svd' - Singular value decomposition
        'pinv' - Moore-Penrose pseudoinverse
    tolerance : float, a small positive number
        Tolerance for eigenvalue. Values smaller than tolerance is considered
        zero.
    Returns
    -------
    a tuple of matrices or values necessary for hypotheses testing

    .. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm
    Notes
    -----
    Status: experimental and incomplete

    s8   x(n=%d) and y(n=%d) should have the same number of rows!R   t   tols   Covariance of x singular!R   i    g      ð?i   s   %s is not a supported method!N(   t   shapet
   ValueErrorR   t   dott   TR   t   npt   subtractR   t   sumt   lent   diagt   power(   t   endogt   exogt   methodt	   tolerancet   yt   xt   nobst   k_endogt   nobs1t   k_exogt   df_residt   pinv_xt   paramst   inv_covt   tt   sscprt   ut   st   vt   invs(    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyt   _multivariate_ols_fit?   s8    
*
00!*c   !      C   s¿  | } | } | } t  j | | g ƒ } |  | k }	 |	 j ƒ  }
 |  |	 } t  j g  | D] } | d | ^ qV ƒ } t  j | | ƒ d d } | | d d } d d d d d g } d d	 d
 d g } t j d | d | ƒ } d „  } | t  j d | ƒ ƒ | j d <| | j ƒ  ƒ | j d <| | j ƒ  ƒ | j d <| | j	 ƒ  ƒ | j d <| | | d d } | | d d } | | } | | | | d d k r×t  j
 | | | | d | | | | d ƒ } n d } | | d | } | j d } t  j | d | ƒ } d | | | | } | | j d <| | j d <| | j d <t j j | | | ƒ } | | j d <| j d } | d | | d } | d | | d } | | | | | } | | j d <| | j d <| | j d <t j j | | | ƒ } | | j d <| j d  } | d k r¥| d | | d | d d | d | d } | | } d | | d | d } | d d | } | | | | } n: | d | | d } | | | d } | | | | } | | j d! <| | j d" <| | j d# <t j j | | | ƒ } | | j d$ <| j d% }  t  j	 | | g ƒ } | } | | | } | | |  } | | j d& <| | j d' <| | j d( <t j j | | | ƒ } | | j d) <| S(*   sT  
    For multivariate linear model Y = X * B
    Testing hypotheses
        L*B*M = 0
    where L is contrast matrix, B is the parameters of the
    multivariate linear model and M is dependent variable transform matrix.
        T = L*inv(X'X)*L'
        H = M'B'L'*inv(T)*LBM
        E =  M'(Y'Y - B'X'XB)M

    Parameters
    ----------
    eigenvals : array
        The eigenvalues of inv(E + H)*H
    r_err_sscp : int
        Rank of E + H
    r_contrast : int
        Rank of T matrix
    df_resid : int
        Residual degree of freedom (n_samples minus n_variables of X)
    tolerance : float
        smaller than which eigenvalue is considered 0

    Returns
    -------
    A DataFrame

    References
    ----------
    .. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm
    i   i   t   Values   Num DFs   Den DFs   F Values   Pr > Fs   Wilks' lambdas   Pillai's traces   Hotelling-Lawley traces   Roy's greatest roott   columnst   indexc         S   s   t  j |  g ƒ d S(   Ni    (   R   t   real(   R   (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyt   fnµ   s    i   i   i    (   s   Wilks' lambdaR,   (   s   Pillai's traceR,   (   s   Hotelling-Lawley traceR,   (   s   Roy's greatest rootR,   (   s   Wilks' lambdaR,   (   s   Wilks' lambdas   Num DF(   s   Wilks' lambdas   Den DF(   s   Wilks' lambdas   F Value(   s   Wilks' lambdas   Pr > F(   s   Pillai's traceR,   (   s   Pillai's traces   Num DF(   s   Pillai's traces   Den DF(   s   Pillai's traces   F Value(   s   Pillai's traces   Pr > F(   s   Hotelling-Lawley traceR,   (   s   Hotelling-Lawley traces   Num DF(   s   Hotelling-Lawley traces   Den DF(   s   Hotelling-Lawley traces   F Value(   s   Hotelling-Lawley traces   Pr > F(   s   Roy's greatest rootR,   (   s   Roy's greatest roots   Num DF(   s   Roy's greatest roots   Den DF(   s   Roy's greatest roots   F Value(   s   Roy's greatest roots   Pr > F(   R   t   minR   t   arrayt   abst   pdt	   DataFramet   prodt   loct   maxt   sqrtR   R   t   ft   sf(!   t	   eigenvalst
   r_err_sscpt
   r_contrastR!   R   R)   t   pt   qR(   t   indt   n_et   eigv2t   it   eigv1t   mt   nt   colsR.   t   resultsR0   t   rR'   t   df1R%   t   df2t   lmdt   Ft   pvalt   Vt   Ut   bt   ct   sigma(    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyt   multivariate_stats‚   sŒ    "
*		 
62
c            s"   ‡  f d †  } t  |  | | | ƒ S(   Nc            s¡   ˆ  \ } } } } |  j  | ƒ j  | ƒ | } |  j  | ƒ j  |  j ƒ } t | ƒ }	 | j j  t | ƒ ƒ j  | ƒ }
 | j j  | ƒ j  | ƒ } | |
 |	 | f S(   N(   R   R   R   R   (   t   Lt   Mt   CR#   R!   R$   R&   t   t1t   t2R@   t   Ht   E(   t   fit_results(    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyR0   û   s    !(   t   _multivariate_test(   t
   hypothesesR]   t
   exog_namest   endog_namesR0   (    (   R]   sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyt   _multivariate_ols_testù   s    c         C   s—  t  | ƒ } t  | ƒ } i  } xr|  D]j} t  | ƒ d k rX | \ } }	 d  }
 d  } ng t  | ƒ d k r‚ | \ } }	 }
 d  } n= t  | ƒ d k r© | \ } }	 }
 } n t d t  | ƒ ƒ ‚ t d „  |	 Dƒ ƒ rð t | ƒ j |	 ƒ j }	 nj t |	 t j	 ƒ st  |	 j
 ƒ d k r't d ƒ ‚ n  |	 j
 d | k rZt d |	 j
 d | f ƒ ‚ n  |
 d  k rxt j | ƒ }
 n­ t d	 „  |
 Dƒ ƒ r¬t | ƒ j |
 ƒ j j }
 ny |
 d  k	 r%t |
 t j	 ƒ sàt  |
 j
 ƒ d k rït d
 ƒ ‚ n  |
 j
 d | k r%t d |
 j
 d | f ƒ ‚ q%n  | d  k rWt j |	 j
 d |
 j
 d g ƒ } n! t | t j	 ƒ sxt d ƒ ‚ n  | j
 d |	 j
 d k r¹t d |	 j
 d | j
 d f ƒ ‚ n  | j
 d |
 j
 d k rút d |
 j
 d | j
 d f ƒ ‚ n  | |	 |
 | ƒ \ } } } } t j | | ƒ } t | ƒ } t j t t | | ƒ ƒ ƒ } t | | | | ƒ } i | d 6|	 d 6|
 d 6| d 6| | <q% W| S(   Ni   i   i   sB   hypotheses must be a tuple of length 2, 3 or 4. len(hypotheses)=%dc         s   s   |  ] } t  | t ƒ Vq d  S(   N(   t
   isinstanceR	   (   t   .0t   j(    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pys	   <genexpr>  s    s&   Contrast matrix L must be a 2-d array!i   sJ   Contrast matrix L should have the same number of columns as exog! %d != %dc         s   s   |  ] } t  | t ƒ Vq d  S(   N(   Rc   R	   (   Rd   Re   (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pys	   <genexpr>(  s    s'   Transform matrix M must be a 2-d array!i    sb   Transform matrix M should have the same number of rows as the number of columns of endog! %d != %ds&   Constant matrix C must be a 2-d array!sC   contrast L and constant C must have the same number of rows! %d!=%dsG   transform M and constant C must have the same number of columns! %d!=%dt   statt
   contrast_Lt   transform_Mt
   constant_C(   R   t   NoneR   t   anyR   t   linear_constraintt   coefsRc   R   t   ndarrayR   t   eyeR   t   zerost   addR   t   sortR   R   RU   (   R_   R`   Ra   R0   t   k_xvart   k_yvarRI   t   hypot   nameRV   RW   RX   R\   R[   R@   R!   t   EHR?   RC   t
   stat_table(    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyR^     sd    		((&!!s´  
        Multivariate linear model hypotheses testing

        For y = x * params, where y are the dependent variables and x are the
        independent variables, testing L * params * M = 0 where L is the contrast
        matrix for hypotheses testing and M is the transformation matrix for
        transforming the dependent variables in y.

        Algorithm:
            T = L*inv(X'X)*L'
            H = M'B'L'*inv(T)*LBM
            E =  M'(Y'Y - B'X'XB)M
        And then finding the eigenvalues of inv(H + E)*H

        .. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm

        Parameters
        ----------
        sg  
        k_xvar : int
            The number of independent variables
        k_yvar : int
            The number of dependent variables
        fn : function
            a function fn(contrast_L, transform_M) that returns E, H, q, df_resid
            where q is the rank of T matrix

        Returns
        -------
        results : MANOVAResults

        t   _MultivariateOLSc           B   s)   e  Z d  Z d d d „ Z d d „ Z RS(   sÎ  
    Multivariate linear model via least squares


    Parameters
    ----------
    endog : array_like
        Dependent variables. A nobs x k_endog array where nobs is
        the number of observations and k_endog is the number of dependent
        variables
    exog : array_like
        Independent variables. A nobs x k_exog array where nobs is the
        number of observations and k_exog is the number of independent
        variables. An intercept is not included by default and should be added
        by the user (models specified using a formula include an intercept by
        default)

    Attributes
    ----------
    endog : array
        See Parameters.
    exog : array
        See Parameters.
    t   nonec         K   sc   t  | j ƒ d k s( | j d d k r7 t d ƒ ‚ n  t t |  ƒ j | | d | d | | d  S(   Ni   sG   There must be more than one dependent variable to fit multivariate OLS!t   missingt   hasconst(   R   R   R   t   superRy   t   __init__(   t   selfR   R   R{   R|   t   kwargs(    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyR~   ˆ  s    (R   c         C   s(   t  |  j |  j d | ƒ|  _ t |  ƒ S(   NR   (   R+   R   R   t
   _fittedmodt   _MultivariateOLSResults(   R   R   (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyt   fit  s    N(   t   __name__t
   __module__t   __doc__Rj   R~   Rƒ   (    (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyRy   o  s   R‚   c           B   sF   e  Z d  Z d „  Z d „  Z d d „ Z d e d e _ d „  Z RS(   s)   
    _MultivariateOLS results class

    c         C   sd   t  | d ƒ r3 t  | j d ƒ r3 | j j |  _ n	 d  |  _ | j |  _ | j |  _ | j |  _ d  S(   Nt   datat   design_info(   t   hasattrR‡   Rˆ   Rj   R`   Ra   R   (   R   t   fitted_mv_ols(    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyR~   š  s    	c         C   s   |  j  ƒ  j ƒ  S(   N(   t   summaryt   __str__(   R   (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyRŒ   ¤  s    c   
      C   s  t  |  j ƒ } | d  k rè |  j d  k	 r‰ |  j j } g  } x¦ | D]? } t j | ƒ | | d  d  … f } | j | | d  g ƒ qC Wqè g  } xV t | ƒ D]E } d | } t j	 d | g ƒ } d | | <| j | | d  g ƒ qœ Wn  t
 | |  j |  j |  j ƒ }	 t |	 |  j |  j ƒ S(   Ns   x%di   (   R   R`   Rj   Rˆ   t   term_name_slicesR   Ro   t   appendt   rangeRp   Rb   R   Ra   t   MultivariateTestResults(
   R   R_   Rs   t   termst   keyt
   L_contrastRD   Rv   RV   RI   (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyt   mv_test§  s&    #

s2   
Linear hypotheses testing

Parameters
----------
s8  

Returns
-------
results: _MultivariateOLSResults

Notes
-----
Tests hypotheses of the form

    L * params * M = C

where `params` is the regression coefficient matrix for the
linear model y = x * params, `L` is the contrast matrix, `M` is the
dependent variable transform matrix and C is the constant matrix.
c         C   s
   t  ‚ d  S(   N(   t   NotImplementedError(   R   (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyR‹   Ô  s    N(	   R„   R…   R†   R~   RŒ   Rj   R”   t   _hypotheses_docR‹   (    (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyR‚   •  s   	
	
R   c           B   sJ   e  Z d  Z d „  Z d „  Z d „  Z e d „  ƒ Z e e e d „ Z	 RS(   sa   Multivariate test results class
    Returned by `mv_test` method of `_MultivariateOLSResults` class

    Attributes
    ----------
    results : dict
       For hypothesis name `key`:
           results[key]['stat'] contains the multivaraite test results
           results[key]['contrast_L'] contains the contrast_L matrix
           results[key]['transform_M'] contains the transform_M matrix
           results[key]['constant_C'] contains the constant_C matrix
    endog_names : string
    exog_names : string
    summary_frame : multiindex dataframe
        Returns results as a multiindex dataframe
    c         C   s   | |  _  | |  _ | |  _ d  S(   N(   RI   Ra   R`   (   R   t
   mv_test_dfRa   R`   (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyR~   é  s    		c         C   s   |  j  ƒ  j ƒ  S(   N(   R‹   RŒ   (   R   (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyRŒ   î  s    c         C   s   |  j  | S(   N(   RI   (   R   t   item(    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyt   __getitem__ñ  s    c         C   s§   g  } xT |  j  D]I } |  j  | d j ƒ  } | | j d d … d f <| j | j ƒ  ƒ q Wt j | d d ƒ} | j d d g ƒ } | j j	 d d g d t
 ƒ| S(	   s:   
        Return results as a multiindex dataframe
        Rf   Nt   Effectt   axisi    R.   t	   Statistict   inplace(   RI   t   copyR7   RŽ   t   reset_indexR4   t   concatt	   set_indexR.   t	   set_namest   True(   R   t   dfR’   t   tmp(    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyt   summary_frameô  s    c         C   s‡  t  j ƒ  } | j d ƒ xg|  j D]\} | j i d d 6ƒ |  j | d j ƒ  } | j ƒ  } | j j } | | d <| | _ d d d d g | _	 | j
 | ƒ | rî | j i d | 6ƒ t j |  j | d d |  j ƒ} | j
 | ƒ n  | r;| j i d | 6ƒ t j |  j | d	 d
 |  j ƒ} | j
 | ƒ n  | r# | j i d | 6ƒ t j |  j | d ƒ } | j
 | ƒ q# q# W| S(   sÔ   

        Parameters
        ----------
        contrast_L : True or False
            Whether to show contrast_L matrix
        transform_M : True or False
            Whether to show transform_M matrix
        s   Multivariate linear modelt    Rf   i    s    contrast L=Rg   R-   s    transform M=Rh   R.   s    constant C=Ri   (   R   t   Summaryt	   add_titleRI   t   add_dictRž   RŸ   R-   t   valuesR.   t   add_dfR4   R5   R`   Ra   (   R   t   show_contrast_Lt   show_transform_Mt   show_constant_Ct   summR’   R¤   RS   (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyR‹     s4    
	(
   R„   R…   R†   R~   RŒ   R™   t   propertyR¦   t   FalseR‹   (    (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyR   Ø  s   			("   R†   t
   __future__R    t   numpyR   t   numpy.linalgR   R   R   R   R   R   t   scipyR   t   pandasR4   t   patsyR   t   statsmodels.compatR	   t   statsmodels.base.modelR
   t   statsmodels.iolibR   t   __docformat__R–   R+   RU   Rb   R^   Ry   t   objectR‚   R   (    (    (    sH   lib/python2.7/site-packages/statsmodels/multivariate/multivariate_ols.pyt   <module>   s(   .*Eu		S
&C