B
    ZM                 @   s   d Z ddlmZ ddlZddlmZmZmZm	Z	m
Z
mZ ddlmZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ d
ZdZdddZdddZdd Zdd Zde d e_ G dd deZG dd deZ G dd deZ!dS ) z*General linear model

author: Yichuan Liu
    )divisionN)eigvalsinvsolvematrix_rankpinvsvd)stats)
DesignInfo)string_types)Model)summary2zrestructuredtext ena=  hypotheses: A list of tuples
    Hypothesis `L*B*M = C` to be tested where B is the parameters in
    regression Y = X*B. Each element is a tuple of length 2, 3, or 4:
    
      * (name, contrast_L)
      * (name, contrast_L, transform_M)
      * (name, contrast_L, transform_M, constant_C)

    containing a string `name`, the contrast matrix L, the transform
    matrix M (for transforming dependent variables), and right-hand side
    constant matrix constant_C, respectively.

    contrast_L : 2D array or an array of strings
        Left-hand side contrast matrix for hypotheses testing.
        If 2D array, each row is an hypotheses and each column is an
        independent variable. At least 1 row
        (1 by k_exog, the number of independent variables) is required.
        If an array of strings, it will be passed to
        patsy.DesignInfo().linear_constraint.

    transform_M : 2D array or an array of strings or None, optional
        Left hand side transform matrix.
        If `None` or left out, it is set to a k_endog by k_endog
        identity matrix (i.e. do not transform y matrix).
        If an array of strings, it will be passed to
        patsy.DesignInfo().linear_constraint.

    constant_C : 2D array or None, optional
        Right-hand side constant matrix.
        if `None` or left out it is set to a matrix of zeros
        Must has the same number of rows as contrast_L and the same
        number of columns as transform_M

    If `hypotheses` is None: 1) the effect of each independent variable
    on the dependent variables will be tested. Or 2) if model is created
    using a formula,  `hypotheses` will be created according to
    `design_info`. 1) and 2) is equivalent if no additional variables
    are created by the formula (e.g. dummy variables for categorical
    variables and interaction terms)
r   :0yE>c             C   sx  | }|}|j \}}|j \}}	||kr4td||f ||	 }
|dkrt|}||}||j}t||d|	k rztd||}t|j||j|}||
||fS |dkrht|d\}}}||k	 t
|k rtdd| }|jt||j|}|jtt|d|}t|||}t|j||j|}||
||fS td	| d
S )a(  
    Solve multivariate linear model y = x * params
    where y is dependent variables, x is independent variables

    Parameters
    ----------
    endog : array_like
        each column is a dependent variable
    exog : array_like
        each column is a independent variable
    method : string
        'svd' - Singular value decomposition
        'pinv' - Moore-Penrose pseudoinverse
    tolerance : float, a small positive number
        Tolerance for eigenvalue. Values smaller than tolerance is considered
        zero.
    Returns
    -------
    a tuple of matrices or values necessary for hypotheses testing

    .. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm
    Notes
    -----
    Status: experimental and incomplete

    z8x(n=%d) and y(n=%d) should have the same number of rows!r   )ZtolzCovariance of x singular!r   r   g      ?   z%s is not a supported method!N)shape
ValueErrorr   dotTr   npsubtractr   sumlenZdiagpower)endogexogmethod	toleranceyxZnobsZk_endogZnobs1Zk_exogdf_residZpinv_xparamsinv_covtsscprusvZinvs r'   Hlib/python3.7/site-packages/statsmodels/multivariate/multivariate_ols.py_multivariate_ols_fit?   s8    




  r)   c              C   s  |}|}|}t ||g}| |k}	|	 }
| |	 }t dd |D }t || d d }|| d d }ddddd	g}d
dddg}tj||d}dd }|t d| |jd< || |jd< || |jd< ||	 |jd< ||| d d  }|| d d }|| }|| ||  d dkr`t 
|| | | d || ||  d  }nd}|| d|  }|jd }t |d| }d| | | | }||jd< ||jd< ||jd< tj|||}||jd< |jd }|d| | d  }|d| | d  }|| | ||  }||jd< ||jd< ||jd< tj|||}||jd< |jd }|dkr|d|  |d|   d d| d  |d  }|| }d|| d |d   }|d d | }|| | | }n4|d| | d  }||| d  }|| | | }||jd < ||jd!< ||jd"< tj|||}||jd#< |jd }t 	||g}|}|| | }|| | }||jd$< ||jd%< ||jd&< tj|||}||jd'< |S )(aT  
    For multivariate linear model Y = X * B
    Testing hypotheses
        L*B*M = 0
    where L is contrast matrix, B is the parameters of the
    multivariate linear model and M is dependent variable transform matrix.
        T = L*inv(X'X)*L'
        H = M'B'L'*inv(T)*LBM
        E =  M'(Y'Y - B'X'XB)M

    Parameters
    ----------
    eigenvals : array
        The eigenvalues of inv(E + H)*H
    r_err_sscp : int
        Rank of E + H
    r_contrast : int
        Rank of T matrix
    df_resid : int
        Residual degree of freedom (n_samples minus n_variables of X)
    tolerance : float
        smaller than which eigenvalue is considered 0

    Returns
    -------
    A DataFrame

    References
    ----------
    .. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm
    c             S   s   g | ]}|d |  qS )   r'   ).0ir'   r'   r(   
<listcomp>   s    z&multivariate_stats.<locals>.<listcomp>r*   r   ValuezNum DFzDen DFzF ValuezPr > FzWilks' lambdazPillai's tracezHotelling-Lawley tracezRoy's greatest root)columnsindexc             S   s   t | gd S )Nr   )r   real)r   r'   r'   r(   fn   s    zmultivariate_stats.<locals>.fn)zWilks' lambdar.   )zPillai's tracer.   )zHotelling-Lawley tracer.   )zRoy's greatest rootr.         r   )zWilks' lambdazNum DF)zWilks' lambdazDen DF)zWilks' lambdazF Value)zWilks' lambdazPr > F)zPillai's tracezNum DF)zPillai's tracezDen DF)zPillai's tracezF Value)zPillai's tracezPr > F)zHotelling-Lawley tracezNum DF)zHotelling-Lawley tracezDen DF)zHotelling-Lawley tracezF Value)zHotelling-Lawley tracezPr > F)zRoy's greatest rootzNum DF)zRoy's greatest rootzDen DF)zRoy's greatest rootzF Value)zRoy's greatest rootzPr > F)r   minr   Zarrayabspd	DataFrameZprodlocmaxZsqrtr   r	   fZsf) Z	eigenvalsZ
r_err_sscpZ
r_contrastr   r   r&   pqr%   ZindZn_eeigv2Zeigv1mnZcolsr0   resultsr2   rr$   Zdf1r"   Zdf2ZlmdFZpvalVUbcZsigmar'   r'   r(   multivariate_stats   s    "0











0








rH   c                s    fdd}t | |||S )Nc                sn    \}}}}|  | || }|  | | j}t|}	|j t| |}
|j | |}||
|	|fS )N)r   r   r   r   )LMCr    r   r!   r#   Zt1Zt2r=   HE)fit_resultsr'   r(   r2      s    z"_multivariate_ols_test.<locals>.fn)_multivariate_test)
hypothesesrN   
exog_namesendog_namesr2   r'   )rN   r(   _multivariate_ols_test   s    rS   c             C   s  t |}t |}i }xl| D ]b}t |dkr@|\}}	d }
d }nFt |dkr\|\}}	}
d }n*t |dkrv|\}}	}
}ntdt | tdd |	D rt||	j}	nFt|	tjrt |	j	dkrtd|	j	d |krtd	|	j	d |f |
d krt
|}
n~td
d |
D r.t||
jj}
nV|
d k	rt|
tjrVt |
j	dkr^td|
j	d |krtd|
j	d |f |d krt|	j	d |
j	d g}nt|tjstd|j	d |	j	d krtd|	j	d |j	d f |j	d |
j	d kr$td|
j	d |j	d f ||	|
|\}}}}t||}t|}ttt||}t||||}||	|
|d||< qW |S )Nr      r3   zBhypotheses must be a tuple of length 2, 3 or 4. len(hypotheses)=%dc             s   s   | ]}t |tV  qd S )N)
isinstancer   )r+   jr'   r'   r(   	<genexpr>  s    z%_multivariate_test.<locals>.<genexpr>z&Contrast matrix L must be a 2-d array!r*   zJContrast matrix L should have the same number of columns as exog! %d != %dc             s   s   | ]}t |tV  qd S )N)rU   r   )r+   rV   r'   r'   r(   rW   (  s    z'Transform matrix M must be a 2-d array!r   zbTransform matrix M should have the same number of rows as the number of columns of endog! %d != %dz&Constant matrix C must be a 2-d array!zCcontrast L and constant C must have the same number of rows! %d!=%dzGtransform M and constant C must have the same number of columns! %d!=%d)stat
contrast_Ltransform_M
constant_C)r   r   anyr
   Zlinear_constraintZcoefsrU   r   Zndarrayr   eyer   zerosaddr   sortr   r   rH   )rP   rQ   rR   r2   k_xvarZk_yvarrA   ZhyponamerI   rJ   rK   rM   rL   r=   r   ZEHr<   r>   Z
stat_tabler'   r'   r(   rO     sd    



rO   a  
        Multivariate linear model hypotheses testing

        For y = x * params, where y are the dependent variables and x are the
        independent variables, testing L * params * M = 0 where L is the contrast
        matrix for hypotheses testing and M is the transformation matrix for
        transforming the dependent variables in y.

        Algorithm:
            T = L*inv(X'X)*L'
            H = M'B'L'*inv(T)*LBM
            E =  M'(Y'Y - B'X'XB)M
        And then finding the eigenvalues of inv(H + E)*H

        .. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm

        Parameters
        ----------
        ag  
        k_xvar : int
            The number of independent variables
        k_yvar : int
            The number of dependent variables
        fn : function
            a function fn(contrast_L, transform_M) that returns E, H, q, df_resid
            where q is the rank of T matrix

        Returns
        -------
        results : MANOVAResults

        c                   s,   e Zd ZdZd	 fdd	Zd
ddZ  ZS )_MultivariateOLSa  
    Multivariate linear model via least squares


    Parameters
    ----------
    endog : array_like
        Dependent variables. A nobs x k_endog array where nobs is
        the number of observations and k_endog is the number of dependent
        variables
    exog : array_like
        Independent variables. A nobs x k_exog array where nobs is the
        number of observations and k_exog is the number of independent
        variables. An intercept is not included by default and should be added
        by the user (models specified using a formula include an intercept by
        default)

    Attributes
    -----------
    endog : array
        See Parameters.
    exog : array
        See Parameters.
    noneNc                sH   t |jdks|jd dkr$tdtt| j||f||d| d S )Nr*   zGThere must be more than one dependent variable to fit multivariate OLS!)missinghasconst)r   r   r   superrc   __init__)selfr   r   re   rf   kwargs)	__class__r'   r(   rh     s    z_MultivariateOLS.__init__r   c             C   s   t | j| j|d| _t| S )N)r   )r)   r   r   
_fittedmod_MultivariateOLSResults)ri   r   r'   r'   r(   fit  s    z_MultivariateOLS.fit)rd   N)r   )__name__
__module____qualname____doc__rh   rn   __classcell__r'   r'   )rk   r(   rc   o  s   rc   c               @   s@   e Zd ZdZdd Zdd ZdddZd	e d
 e_dd ZdS )rm   z)
    _MultivariateOLS results class

    c             C   sD   t |dr"t |jdr"|jj| _nd | _|j| _|j| _|j| _d S )Ndatadesign_info)hasattrrt   ru   rQ   rR   rl   )ri   Zfitted_mv_olsr'   r'   r(   rh     s    
z _MultivariateOLSResults.__init__c             C   s   |    S )N)summary__str__)ri   r'   r'   r(   rx     s    z_MultivariateOLSResults.__str__Nc       
      C   s   t | j}|d kr| jd k	rb| jj}g }x||D ].}t||| d d f }|||d g q.W nDg }x>t|D ]2}d| }td|g}d||< |||d g qpW t	|| j
| j| j}	t|	| j| jS )Nzx%dr*   )r   rQ   ru   Zterm_name_slicesr   r]   appendranger^   rS   rl   rR   MultivariateTestResults)
ri   rP   ra   ZtermskeyZ
L_contrastr,   rb   rI   rA   r'   r'   r(   mv_test  s&    


z_MultivariateOLSResults.mv_testz2
Linear hypotheses testing

Parameters
----------
a9  

Returns
-------
results: _MultivariateOLSResults

Notes
-----
Tests hypotheses of the form 

    L * params * M = C

where `params` is the regression coefficient matrix for the
linear model y = x * params, `L` is the contrast matrix, `M` is the
dependent variable transform matrix and C is the constant matrix.
c             C   s   t d S )N)NotImplementedError)ri   r'   r'   r(   rw     s    z_MultivariateOLSResults.summary)N)	ro   rp   rq   rr   rh   rx   r}   _hypotheses_docrw   r'   r'   r'   r(   rm     s   

rm   c               @   s>   e Zd ZdZdd Zdd Zdd Zedd	 ZdddZ	dS )r{   ab   Multivariate test results class
    Returned by `mv_test` method of `_MultivariateOLSResults` class

    Attributes
    -----------
    results : dict
       For hypothesis name `key`:
           results[key]['stat'] contains the multivaraite test results
           results[key]['contrast_L'] contains the contrast_L matrix
           results[key]['transform_M'] contains the transform_M matrix
           results[key]['constant_C'] contains the constant_C matrix
    endog_names : string
    exog_names : string
    summary_frame : multiindex dataframe
        Returns results as a multiindex dataframe
    c             C   s   || _ || _|| _d S )N)rA   rR   rQ   )ri   Z
mv_test_dfrR   rQ   r'   r'   r(   rh     s    z MultivariateTestResults.__init__c             C   s   |    S )N)rw   rx   )ri   r'   r'   r(   rx     s    zMultivariateTestResults.__str__c             C   s
   | j | S )N)rA   )ri   itemr'   r'   r(   __getitem__  s    z#MultivariateTestResults.__getitem__c             C   sz   g }x@| j D ]6}| j | d  }||jdddf< ||  qW tj|dd}|ddg}|jj	ddgdd	 |S )
z:
        Return results as a multiindex dataframe
        rX   NZEffectr   )Zaxisr0   Z	StatisticT)Zinplace)
rA   copyr9   ry   reset_indexr7   concatZ	set_indexr0   Z	set_names)ri   dfr|   Ztmpr'   r'   r(   summary_frame  s    z%MultivariateTestResults.summary_frameFc             C   s  t  }|d x | jD ]}|ddi | j| d  }| }|jj}||d< ||_ddddg|_	|
| |r||di tj| j| d | jd}|
| |r||di tj| j| d	 | jd
}|
| |r||di t| j| d }|
| qW |S )z

        Parameters
        ----------
        contrast_L : True or False
            Whether to show contrast_L matrix
        transform_M : True or False
            Whether to show transform_M matrix
        zMultivariate linear model rX   r   z contrast L=rY   )r/   z transform M=rZ   )r0   z constant C=r[   )r   ZSummaryZ	add_titlerA   Zadd_dictr   r   r/   valuesr0   Zadd_dfr7   r8   rQ   rR   )ri   Zshow_contrast_LZshow_transform_MZshow_constant_CZsummr|   r   rG   r'   r'   r(   rw     s4    





zMultivariateTestResults.summaryN)FFF)
ro   rp   rq   rr   rh   rx   r   propertyr   rw   r'   r'   r'   r(   r{     s    r{   )r   r   )r   )"rr   Z
__future__r   Znumpyr   Znumpy.linalgr   r   r   r   r   r   Zscipyr	   Zpandasr7   Zpatsyr
   Zstatsmodels.compatr   Zstatsmodels.base.modelr   Zstatsmodels.iolibr   Z__docformat__r   r)   rH   rS   rO   rc   objectrm   r{   r'   r'   r'   r(   <module>   s(    *
E
uS&C