B
    Zؚ                 @   s  d Z ddlmZmZmZ ddlmZ ddlmZ ddl	Z
ddlZddlmZmZ ddlmZmZmZ dd	lmZmZmZmZ dd
lmZ ddlmZ ddlmZ ddlm   m!Z" ddl#m$Z$m%Z% G dd deZ&G dd deZ'G dd deZ(e")e(e' dS )zs
Vector Autoregressive Moving Average with eXogenous regressors model

Author: Chad Fulton
License: Simplified-BSD
    )divisionabsolute_importprint_function)warn)OrderedDictN   )INVERT_UNIVARIATESOLVE_LU)MLEModel
MLEResultsMLEResultsWrapper)is_invertibleprepare_exog!constrain_stationary_multivariate#unconstrain_stationary_multivariate)Bunch)_is_using_pandas)	var_model)EstimationWarningValueWarningc                   sb   e Zd ZdZd fdd		Zed
d Zedd Zedd Zdd Z	dd Z
 fddZ  ZS )VARMAXu  
    Vector Autoregressive Moving Average with eXogenous regressors model

    Parameters
    ----------
    endog : array_like
        The observed time-series process :math:`y`, , shaped nobs x k_endog.
    exog : array_like, optional
        Array of exogenous regressors, shaped nobs x k.
    order : iterable
        The (p,q) order of the model for the number of AR and MA parameters to
        use.
    trend : {'nc', 'c'}, optional
        Parameter controlling the deterministic trend polynomial.
        Can be specified as a string where 'c' indicates a constant intercept
        and 'nc' indicates no intercept term.
    error_cov_type : {'diagonal', 'unstructured'}, optional
        The structure of the covariance matrix of the error term, where
        "unstructured" puts no restrictions on the matrix and "diagonal"
        requires it to be a diagonal matrix (uncorrelated errors). Default is
        "unstructured".
    measurement_error : boolean, optional
        Whether or not to assume the endogenous observations `endog` were
        measured with error. Default is False.
    enforce_stationarity : boolean, optional
        Whether or not to transform the AR parameters to enforce stationarity
        in the autoregressive component of the model. Default is True.
    enforce_invertibility : boolean, optional
        Whether or not to transform the MA parameters to enforce invertibility
        in the moving average component of the model. Default is True.
    kwargs
        Keyword arguments may be used to provide default values for state space
        matrices or for Kalman filtering options. See `Representation`, and
        `KalmanFilter` for more details.

    Attributes
    ----------
    order : iterable
        The (p,q) order of the model for the number of AR and MA parameters to
        use.
    trend : {'nc', 'c'}, optional
        Parameter controlling the deterministic trend polynomial.
        Can be specified as a string where 'c' indicates a constant intercept
        and 'nc' indicates no intercept term.
    error_cov_type : {'diagonal', 'unstructured'}, optional
        The structure of the covariance matrix of the error term, where
        "unstructured" puts no restrictions on the matrix and "diagonal"
        requires it to be a diagonal matrix (uncorrelated errors). Default is
        "unstructured".
    measurement_error : boolean, optional
        Whether or not to assume the endogenous observations `endog` were
        measured with error. Default is False.
    enforce_stationarity : boolean, optional
        Whether or not to transform the AR parameters to enforce stationarity
        in the autoregressive component of the model. Default is True.
    enforce_invertibility : boolean, optional
        Whether or not to transform the MA parameters to enforce invertibility
        in the moving average component of the model. Default is True.

    Notes
    -----
    Generically, the VARMAX model is specified (see for example chapter 18 of
    [1]_):

    .. math::

        y_t = \nu + A_1 y_{t-1} + \dots + A_p y_{t-p} + B x_t + \epsilon_t +
        M_1 \epsilon_{t-1} + \dots M_q \epsilon_{t-q}

    where :math:`\epsilon_t \sim N(0, \Omega)`, and where :math:`y_t` is a
    `k_endog x 1` vector. Additionally, this model allows considering the case
    where the variables are measured with error.

    Note that in the full VARMA(p,q) case there is a fundamental identification
    problem in that the coefficient matrices :math:`\{A_i, M_j\}` are not
    generally unique, meaning that for a given time series process there may
    be multiple sets of matrices that equivalently represent it. See Chapter 12
    of [1]_ for more informationl. Although this class can be used to estimate
    VARMA(p,q) models, a warning is issued to remind users that no steps have
    been taken to ensure identification in this case.

    References
    ----------
    .. [1] Lütkepohl, Helmut. 2007.
       New Introduction to Multiple Time Series Analysis.
       Berlin: Springer.

    Nr   r   cunstructuredFTc	                s  | _ | _| _| _| _| _t|d  _t|d  _t jdk _	|dkr`t
d|dkrpt
d jdkr jdkrt
d jdkr jdkrtd	t t|\ _} jdk _t|d st|}t jd}
|
 j  _|jd }|}| j }|	d
d |	dttB  tt j|f|||d|	  jdks\ j	dkrdd j_t  _ j  j	  jd<  j d  j  jd<  j d  j  jd<  j  j  jd<  j dkrֈ j  jd< n* j dkr t j  j d  d  jd<  j  j  jd< t! j"  _# jdkrFt$ j% j&f jd< t' j }d jd| <  jdkrt' jd  j  }|d  j  |d f}d jd| < t' jd  j  }|d |
d  j   |d |
 j   f}d jd| < t' j }d jd| < |d |
 j   |d f} jdkr@d jd| <  jdkrn jdkrntj(dd |f  _)n& jdkrtj(dd |d d f  _) jdkrtj(dd |d d f  _*ntj(dd ||d f  _* j dkrdt' j   _+n j dkrt, j  _- jr*dt' j   _. fdd }d}|d|\ _/}|d|\ _0}|d|\ _1}|d|\ _2}|d|\ _3}|d|\ _4}d S )!Nr   r   r   )r   nczInvalid trend specification.)diagonalr   z3Invalid error covariance matrix type specification.zNInvalid VARMAX(p,q) specification; at least one p,q must be greater than zero.zcEstimation of VARMA(p,q) models is not generically robust, due especially to identification issues.Zinitialization
stationaryZinversion_method)exogk_statesk_posdefFtrend   armaZ
regressionr   	state_covr   obs_covZstate_intercept)Zdesign)
transition)Z	selectionr&   )r$   )r%   c                s,    j |  }tj|||  }||7 }||fS )N)
parametersnps_)keyoffsetlengthZparam_slice)self @lib/python3.7/site-packages/statsmodels/tsa/statespace/varmax.py_slice  s    
zVARMAX.__init__.<locals>._slice)5error_cov_typemeasurement_errorenforce_stationarityenforce_invertibilityorderr    intk_ark_mak_trend
ValueErrorr   r   r   k_exogmle_regressionr   r(   Z
asanyarraymaxZ_k_ordershape
setdefaultr   r	   superr   __init__ssmZ_time_invariantr   r'   k_endogsumvaluesk_paramszerosr   nobsZdiag_indicesr)   _idx_state_intercept_idx_transition_idx_state_covZtril_indices_idx_lower_state_cov_idx_obs_cov_params_trend
_params_ar
_params_ma_params_regression_params_state_cov_params_obs_cov)r-   endogr   r5   r    r1   r2   r3   r4   kwargsZ	_min_k_arrC   r   r   idxr0   r+   )	__class__)r-   r/   rA   v   s    




zVARMAX.__init__c             C   s   dt tfiS )Nfit)VARMAXResultsVARMAXResultsWrapper)r-   r.   r.   r/   _res_classes  s    zVARMAX._res_classesc             C   sD  t j| jt jd}t| j }y| }W n t	k
rB   Y nX |j
ddj}| jdkrf| j nd }t t |rt jt |dd }|| }|d k	r|| }t d}| jdkrt j||j}|t ||j8 }g }| jdkr| jnd}t|}|j|d | jd}	t |	jj}| jdkrl|d d df }
| jdkrf|d d dd f  }ng }n| jdkr| }ng }|	j}| jdkr| jr|| j| j | jj| j| j| jj}t dgt!|  }|st"d	g }| j#dkr~t|}|j| j#d d
d}t |jj }| j$r~|| j| j# | jj| j| j| j#j}t dgt!|  }|s~t"d| jdkr|
|| j%< ||| j&< ||| j'< | j(r| || j)< | j*dkr|	j+, || j-< n.| j*dkr
t j.|	j+}|| j/  || j-< | j0r@| j#dkr0|j+, || j1< n|	j+, || j1< |S )N)dtypeZbackfill)methodr   r   )Zaxis)ZmaxlagsZicr    r   z`Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.r   z`Non-invertible starting moving-average parameters found with `enforce_stationarity` set to True.r   r   )2r(   rG   rF   Zfloat64pdZ	DataFramerT   copyZinterpolate	TypeErrorZfillnarE   r;   r   anyZisnanZlinalgZpinvdotTr7   r   VARrX   r    arrayparamsravelZresidr3   reshaperC   r   listr:   r8   r4   rN   rO   rP   r<   rQ   r1   Zsigma_ur   rR   ZcholeskyrL   r2   rS   )r-   rf   rT   r   maskexog_params	ar_paramsr7   Zmod_arZres_arZtrend_paramscoefficient_matricesr   	ma_paramsZmod_maZres_maZ
invertibleZ
cov_factorr.   r.   r/   start_params  s    







zVARMAX.start_paramsc                s   g } j dkr*| fddt jD 7 }| fddt jD 7 }| fddt jD 7 }| fddt jD 7 } jdkr| fddt jD 7 }n& jd	kr| fd
dt jD 7 } jr| fddt jD 7 }|S )Nr   c                s   g | ]}d  j |  qS )zconst.%s)endog_names).0i)r-   r.   r/   
<listcomp>  s   z&VARMAX.param_names.<locals>.<listcomp>c          	      sJ   g | ]B}t  jD ]2}t  jD ]"}d |d  j|  j| f q qqS )z	L%d.%s.%sr   )ranger7   rC   rp   )rq   jrr   k)r-   r.   r/   rs     s   c          	      sJ   g | ]B}t  jD ]2}t  jD ]"}d |d  j|  j| f q qqS )zL%d.e(%s).%sr   )rt   r8   rC   rp   )rq   ru   rr   rv   )r-   r.   r/   rs     s   c                s4   g | ],}t  jD ]}d  j|  j| f qqS )z
beta.%s.%s)rt   r;   Z
exog_namesrp   )rq   rr   ru   )r-   r.   r/   rs     s   r   c                s   g | ]}d  j |  qS )z	sigma2.%s)rp   )rq   rr   )r-   r.   r/   rs     s   r   c                sL   g | ]D}t |d  D ]2}||kr.d j|  nd j|  j| f qqS )r   zsqrt.var.%szsqrt.cov.%s.%s)rt   rp   )rq   rr   ru   )r-   r.   r/   rs     s   c                s   g | ]}d  j |  qS )zmeasurement_variance.%s)rp   )rq   rr   )r-   r.   r/   rs     s   )r    rt   rC   r1   r2   )r-   param_namesr.   )r-   r/   rw     s6    









zVARMAX.param_namesc             C   s  t j|dd}t j|j|jd}|| j || j< | jdkr| jr| jdkr`t 	|| j
 d }n@| jdkrt j| jd j|jd}|| j
 || j< t ||j}|| j | j| j| j }t||\}}| || j< n|| j || j< | jdkrJ| jrJt j| j|jd}|| j | j| j| j }t||\}}| || j< n|| j || j< || j || j< | jdkr|| j
 d || j
< n| jdkr|| j
 || j
< | jr|| j d || j< |S )	aZ  
        Transform unconstrained parameters used by the optimizer to constrained
        parameters used in likelihood evaluation

        Parameters
        ----------
        unconstrained : array_like
            Array of unconstrained parameters used by the optimizer, to be
            transformed.

        Returns
        -------
        constrained : array_like
            Array of constrained parameters which may be used in likelihood
            evalation.

        Notes
        -----
        Constrains the factor transition to be stationary and variances to be
        positive.
        r   )ndmin)r\   r   r   r!   r   r$   )r(   re   rG   r>   r\   rN   r7   r3   r1   diagrR   rB   rL   rb   rc   rO   rh   rC   r   rg   r8   r4   eyerP   rQ   r2   rS   )r-   unconstrainedconstrainedr$   state_cov_lowercoefficientsrm   variancer.   r.   r/   transform_params  s>    




zVARMAX.transform_paramsc             C   s  t j|dd}t j|j|jd}|| j || j< | jdkr| jr| jdkr\t 	|| j
 }n@| jdkrt j| jd j|jd}|| j
 || j< t ||j}|| j | j| j| j }t||\}}| || j< n|| j || j< | jdkrF| jrFt j| j|jd}|| j | j| j| j }t||\}}| || j< n|| j || j< || j || j< | jdkr|| j
 d || j
< n| jdkr|| j
 || j
< | jr|| j d || j< |S )	a  
        Transform constrained parameters used in likelihood evaluation
        to unconstrained parameters used by the optimizer.

        Parameters
        ----------
        constrained : array_like
            Array of constrained parameters used in likelihood evalution, to be
            transformed.

        Returns
        -------
        unconstrained : array_like
            Array of unconstrained parameters used by the optimizer.
        r   )rx   )r\   r   r   r   r$   g      ?)r(   re   rG   r>   r\   rN   r7   r3   r1   ry   rR   rB   rL   rb   rc   rO   rh   rC   r   rg   r8   r4   rz   rP   rQ   r2   rS   )r-   r|   r{   r$   r}   r~   Zunconstrained_matricesr   r.   r.   r/   untransform_params  s>    




zVARMAX.untransform_paramsc                sV  t t| j|f|}| jrh|| j | j| jj}t	
| j|}| jdkrX||| j 7 }|j| j| j< n| jdkr|| j | j| j< || j | j| j| j }|| j | j| j| j }t	j||f | j| j< | jdkr|| j | j| j< nH| jdkr8t	j| jd j|jd}|| j || j< t	
||j| jd< | jrR|| j | j| j< d S )Nr   r   r   r$   )r\   ) r@   r   updater<   rQ   rh   rC   r;   rc   r(   rb   r   r    rN   rB   rI   rO   r7   rP   r8   c_rJ   r1   rR   rK   rG   r>   r\   rL   r2   rS   rM   )r-   rf   rU   rk   	interceptr"   r#   r}   )rW   r.   r/   r   b  s0    






zVARMAX.update)Nr   r   r   FTT)__name__
__module____qualname____doc__rA   propertyr[   ro   rw   r   r   r   __classcell__r.   r.   )rW   r/   r      s   X   r9SMr   c                   sH   e Zd ZdZd fdd	Zd fdd	Zd fdd	Zejje_  ZS )rY   a  
    Class to hold results from fitting an VARMAX model.

    Parameters
    ----------
    model : VARMAX instance
        The fitted model instance

    Attributes
    ----------
    specification : dictionary
        Dictionary including all attributes from the VARMAX model instance.
    coefficient_matrices_var : array
        Array containing autoregressive lag polynomial coefficient matrices,
        ordered from lowest degree to highest.
    coefficient_matrices_vma : array
        Array containing moving average lag polynomial coefficients,
        ordered from lowest degree to highest.

    See Also
    --------
    statsmodels.tsa.statespace.kalman_filter.FilterResults
    statsmodels.tsa.statespace.mlemodel.MLEResults
    opgc                s  t t| j||||f| tj| _tf | jj| jj	| jj
| jj| jj| jj| jj| jj| jj| jjd
| _d | _d | _| jjdkrt| j| jj }| jj}| jj}||| |j|||j| _| jjdkrt| j| jj }	| jj}| jj}
|	||
 |j|||
j| _d S )N)
r1   r2   r3   r4   r5   r7   r8   r    r9   r;   r   )r@   rY   rA   r(   infZdf_residr   modelr1   r2   r3   r4   r5   r7   r8   r    r9   r;   specificationZcoefficient_matrices_varZcoefficient_matrices_vmare   rf   rO   rC   rh   rc   rP   )r-   r   rf   filter_resultsZcov_typerU   rl   rC   r7   rn   r8   )rW   r.   r/   rA     s8    
zVARMAXResults.__init__NFc          
      s  |dkr| j jd }| j j|||dd\}}}	}
|	r| j j| j j dkr| j jjjd |	 }t	|| j j
f}| j jdkr|dkrtdt|}|	| j jf}|j|kstdt|t|jf tj| j jjj|jf j}t||| j j| j j| j j| j j| j j| j jd}|| j x| jj D ]|}|dkr>q,t|j|}|jd	 d
kr,t|jdkr|dd|	 df ||< n |dddd|	 df ||< q,W n"| j jdkr|dk	rt dt! t"t#| j$f |||||d|S )aO  
        In-sample prediction and out-of-sample forecasting

        Parameters
        ----------
        start : int, str, or datetime, optional
            Zero-indexed observation number at which to start forecasting, ie.,
            the first forecast is start. Can also be a date string to
            parse or a datetime type. Default is the the zeroth observation.
        end : int, str, or datetime, optional
            Zero-indexed observation number at which to end forecasting, ie.,
            the first forecast is start. Can also be a date string to
            parse or a datetime type. However, if the dates index does not
            have a fixed frequency, end must be an integer index if you
            want out of sample prediction. Default is the last observation in
            the sample.
        exog : array_like, optional
            If the model includes exogenous regressors, you must provide
            exactly enough out-of-sample values for the exogenous variables if
            end is beyond the last observation in the sample.
        dynamic : boolean, int, str, or datetime, optional
            Integer offset relative to `start` at which to begin dynamic
            prediction. Can also be an absolute date string to parse or a
            datetime type (these are not interpreted as offsets).
            Prior to this observation, true endogenous values will be used for
            prediction; starting with this observation and continuing through
            the end of prediction, forecasted endogenous values will be used
            instead.
        kwargs
            Additional arguments may required for forecasting beyond the end
            of the sample. See `FilterResults.predict` for more details.

        Returns
        -------
        forecast : array
            Array of out of sample forecasts.
        Nr   T)Zsilentz~Out-of-sample forecasting in a model with a regression component requires additional exogenous values via the `exog` argument.zPProvided exogenous values are not of the appropriate shape. Required %s, got %s.)r   r5   r    r1   r2   r3   r4   Zobsr   r!   z_Exogenous array provided to predict, but additional data not required. `exog` argument ignored.)startenddynamicindexr   )%r   Z_indexZ_get_prediction_indexr;   r9   dataZ
orig_endogr>   r(   rG   rC   r:   re   strr   Z	orig_exogrc   r   r5   r    r1   r2   r3   r4   r   rf   r   ZshapeskeysgetattrrB   lenr   r   r@   rY   get_prediction)r-   r   r   r   r   r   rU   Z_startZ_endZ_out_of_sampleZprediction_indexrH   rT   Zrequired_exog_shaper   nameZmat)rW   r.   r/   r     sP    '


(zVARMAXResults.get_prediction皙?Tc          	      sP  ddl m | j}|jdkr<|jdkr<d}d|j|jf }n(|jdkrVd}d|j }nd}d|j }|jdkrv|d7 }|| g}|jd	kr|d
 |jr|d t	t
| j ||| d}|rLtt| j}d fdd	}	| jj}
| jj}| jj}| jj}g }x|t|
D ]n}g }d}| jjd	krL|tj|dd ||
7 }|dkr||
 | }|d |
 | }||t||  |||
d  7 }|dkr||
 | }|d |
 | }||t||  |||
d  7 }|dkr||t|| |d |   ||
| 7 }| jjrF|tj| jj| d dd t|}|| d| jj|  }|	| ||}|j| qW tt| j| jj }|	| |ddd}|j| g }x:||gfD ],}t| }t|dkr|| qW t|}ttt|t|}t|dkrL|	| |ddd}|j| |S )Nr   )summary_paramsZVARMAz(%s,%s)rd   z(%s)ZVMAXr   r   zmeasurement error)alphar   
model_nameZdisplay_paramsTc                sj   | | j | | j| | j| | j| | | f} fddt| jj| 	 D }|d |d|dS )Nc                s,   g | ]$} r$d  |d dd n|qS ).Nr   )joinsplit)rq   r   )	strip_endr.   r/   rs   V  s   z=VARMAXResults.summary.<locals>.make_table.<locals>.<listcomp>F)ZynameZxnamer   Zuse_ttitle)
rf   ZbseZzvaluesZpvaluesZconf_intr(   re   r   rw   tolist)r-   rj   r   r   Zresrw   )r   r   )r   r/   
make_tableP  s    
z)VARMAXResults.summary.<locals>.make_tabler   )rx   r!   zResults for equation %szError covariance matrixF)r   zOther parameters)T)Zstatsmodels.iolib.summaryr   r   r7   r8   r;   r    appendr2   r@   rY   summaryr(   Zaranger   rf   r   rC   rt   re   rF   Zconcatenaterp   ZtablesrR   Zflattenri   set
difference)r-   r   r   Zseparate_paramsspecr   r5   r   indicesr   rC   r7   r8   r;   Zendog_masksrr   Zmasksr+   r   rj   r   tableZstate_cov_maskmZinverse_mask)rW   )r   r   r/   r   0  s    















zVARMAXResults.summary)r   )NNFNN)r   NT)	r   r   r   r   rA   r   r   r   r   r.   r.   )rW   r/   rY     s   , burY   c               @   s0   e Zd Zi ZeejeZi Zeej	eZ	dS )rZ   N)
r   r   r   Z_attrswrapZunion_dictsr   Z_wrap_attrsZ_methodsZ_wrap_methodsr.   r.   r.   r/   rZ     s   rZ   )*r   Z
__future__r   r   r   warningsr   Zstatsmodels.compat.collectionsr   Zpandasr^   Znumpyr(   Zkalman_filterr   r	   Zmlemodelr
   r   r   Ztoolsr   r   r   r   Zstatsmodels.tools.toolsr   Zstatsmodels.tools.datar   Zstatsmodels.tsa.vector_arr   Zstatsmodels.base.wrapperbasewrapperr   Zstatsmodels.tools.sm_exceptionsr   r   r   rY   rZ   Zpopulate_wrapperr.   r.   r.   r/   <module>   s.       p  "