B
    Z_              
   @   s`  d dl mZmZmZmZmZ d dlmZ d dlZ	d dl
m  mZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d<ddZd=ddZd>ddZd?ddZd@ddZdd Zdd Z dd Z!d d! Z"d"d# Z#d$d% Z$d&d' Z%d(d) Z&d*d+ Z'd,d- Z(d.d/ Z)d0d1 Z*d2d3 Z+d4d5 Z,d6d7 Z-d8d9 Z.d:d; Z/dddd)d+d-ddd%d'g
Z0dS )A    )rangelrangelziplongPY3)recarray_selectN)	DataFrame)offsets)	to_offset)ValueWarning)_is_using_pandas_is_recarraycFskipc             C   s  |  }dddg}|dkr,|dd }d}nP|dks<|d	krb|dd
 }|d	kr\|dd
 }d}n|dkrpd
}ntd| t| }t| dp|}|s|r|r| jj}tj| } qt	| tj
rt| } q|  } n
t| } t| }	ttjd|	d tjd|d }
t|
}
|d	kr.|
dddf }
d|kr|sD|rZdd }| |d}n8ttjtjt| dddkddtj| dkdd}t|r|dkrtdn*|dkr|dd }|
ddddf }
|rdnd}|s|r(tj|
| j|d}
|
| g} t| dd| d} n|
| g} t| dd| } |r| jddd} | jj}t|t| }|r|d| | }n||| d  }tsdd |D }dd |D }dd |D }tt||}| t|} | S )a  
    Adds a trend and/or constant to an array.

    Parameters
    ----------
    X : array-like
        Original array of data.
    trend : str {"c","t","ct","ctt"}
        "c" add constant only
        "t" add trend only
        "ct" add constant and linear trend
        "ctt" add constant and linear and quadratic trend.
    prepend : bool
        If True, prepends the new data to the columns of X.
    has_constant : str {'raise', 'add', 'skip'}
        Controls what happens when trend is 'c' and a constant already
        exists in X. 'raise' will raise an error. 'add' will duplicate a
        constant. 'skip' will return the data without change. 'skip' is the
        default.

    Returns
    -------
    y : array, recarray or DataFrame
        The original data with the additional trend columns.  If x is a
        recarray or pandas Series or DataFrame, then the trend column names
        are 'const', 'trend' and 'trend_squared'.

    Notes
    -----
    Returns columns as ["ctt","ct","c"] whenever applicable. There is currently
    no checking for an existing trend.

    See also
    --------
    statsmodels.tools.tools.add_constant
    ZconsttrendZtrend_squaredr   N   r   Zctt   Zcttztrend %s not understood)dtypec             S   s,   yt | dkot | dkS    dS d S )Ng        F)npptpany)s r   7lib/python3.7/site-packages/statsmodels/tsa/tsatools.pysafe_is_const\   s    z add_trend.<locals>.safe_is_const)axisg        raisezx already contains a constantr   )indexcolumnsF)r   Zconvert_datetime64c             S   s   g | ]}|d  qS )r   r   ).0entryr   r   r   
<listcomp>   s    zadd_trend.<locals>.<listcomp>c             S   s   g | ]}|d  qS )r   r   )r!   r"   r   r   r   r#      s    c             S   s   g | ]}t |qS r   )bytes)r!   namer   r   r   r#      s    ) lower
ValueErrorr   r   r   descrpdr   Zfrom_records
isinstanceZSeriescopyr   
asanyarraylenvanderarangeZfloat64ZfliplrZapplyZlogical_andr   r   allr   concatcolumn_stackZ
to_recordsr   listzipZastype)xr   ZprependZhas_constantr    Z
trendorderZis_recarray	is_pandasr(   nobsZtrendarrr   Z	col_constorderZ	new_descrZ	extra_colnamesZdtypesr   r   r   	add_trend   sz    &


"


$


r:   r   Tc                sf  j jrj j} s0tjdkr0tdnt|dkrD|d  t tt	fr^j j   t
sdd |D }|j _t trt    } fddtdd D }t|dd}|d	krt| d }	n<|d
krt|d }	n&|t|krddl}
|
dt |}	t|d|	 }t||	d }|rf |krV||  n||  |rt|}tj|d ||jd
d}nTtjt t|  j f d}x*t|D ]\}}|dd|f ||< qW |rtj||fdd|D d
dS |S nJjdkr4dddf  dkrBd  dk rZjd    dd f }|d	kr~ d }	nd|d
krjd }	nN|dk rjd | d }|jd krވjd }ddl}
|
dt |}	t|dd}t|	}t|	jd }|r: |kr*||  n||  td|f |d|f fS dS )a  
    Returns an array with lags included given an array.

    Parameters
    ----------
    x : array
        An array or NumPy ndarray subclass. Can be either a 1d or 2d array with
        observations in columns.
    col : 'string', int, or None
        If data is a structured array or a recarray, `col` can be a string
        that is the name of the column containing the variable. Or `col` can
        be an int of the zero-based column index. If it's a 1d array `col`
        can be None.
    lags : int
        The number of lags desired.
    drop : bool
        Whether to keep the contemporaneous variable for the data.
    insert : bool or int
        If True, inserts the lagged values after `col`. If False, appends
        the data. If int inserts the lags at int.

    Returns
    -------
    array : ndarray
        Array with lags

    Examples
    --------

    >>> import statsmodels.api as sm
    >>> data = sm.datasets.macrodata.load()
    >>> data = data.data[['year','quarter','realgdp','cpi']]
    >>> data = sm.tsa.add_lag(data, 'realgdp', lags=2)

    Notes
    -----
    Trims the array both forward and backward, so that the array returned
    so that the length of the returned array is len(`X`) - lags. The lags are
    returned in increasing order, ie., t-1,t-2,...,t-lags
    r   z)col is None and the input array is not 1dr   c             S   s"   g | ]}t |trt|n|qS r   )r*   unicoder$   )r!   r%   r   r   r   r#      s    zadd_lag.<locals>.<listcomp>c                s   g | ]} d  d|  qS )_zL(%i)r   )r!   i)colr   r   r#      s    ZBoth)maxlagtrimTFNz<insert > number of variables, inserting at the last position)Zusemask)r   c                s   g | ]}|  d  qS )Nr   )r!   r%   )lagsr5   r   r   r#      s    )r@   )r   r9   r   Zsqueezendim
IndexErrorr-   r*   intr   r   r;   r$   r   lagmatr3   r   warningswarnr   popr   nprfZappend_fieldsTzerosr   	enumerateshaper   r2   )r5   r>   rA   dropinsertr9   ZcontempZ	tmp_namesZndlagsZins_idxrF   Zfirst_namesZ
last_namesZ_xZ	first_arrr=   r%   Z
first_colsZ	last_colsr   )r>   rA   r5   r   add_lag   s    )













rP   c             C   s   | j dkrt|dkr| j} n| j dkr0td| jd }|dkrT| | jdd }n>tjtt	||d d}tj
|| }| t|| }| j dkrt|dkr|j}|S )a  
    Detrend an array with a trend of given order along axis 0 or 1

    Parameters
    ----------
    x : array_like, 1d or 2d
        data, if 2d, then each row or column is independently detrended with the
        same trendorder, but independent trend estimates
    order : int
        specifies the polynomial order of the trend, zero is constant, one is
        linear trend, two is quadratic trend
    axis : int
        axis can be either 0, observations by rows,
        or 1, observations by columns

    Returns
    -------
    detrended data series : ndarray
        The detrended series is the residual of the linear regression of the
        data on the trend of given order.
    r   r   z0x.ndim > 2 is not implemented until it is neededr   )r   )N)rB   rD   rJ   NotImplementedErrorrM   Zmeanr   r.   r/   floatZlinalgZpinvdot)r5   r8   r   r7   ZresidZtrendsZbetar   r   r   detrend  s    

rU   forwardexc                s*  t | do|}|dkrdn|}| }|r:|dkr:tdt| }d}|jdkrb|dddf }|j\}}	|dkrx|	}||krtdt|| |	|d  f}
xNtdt	|d D ]8}||
|| || | |	||  |	|| d  f< qW |d	krd}n|d
kr|}ntd|dkr.t
|
}n|}|rt| trJ| jn| jg}dd |D }x6t|D ]*}t|d  | fdd|D  qjW t|
d| | j|d}
|
j|d }|dkr|| }||d}n2|
|||df }|dkr|
||d|f }|dkr"||fS |S dS )a  
    Create 2d array of lags

    Parameters
    ----------
    x : array_like, 1d or 2d
        data; if 2d, observation in rows and variables in columns
    maxlag : int
        all lags from zero to maxlag are included
    trim : str {'forward', 'backward', 'both', 'none'} or None
        * 'forward' : trim invalid observations in front
        * 'backward' : trim invalid initial observations
        * 'both' : trim invalid observations on both sides
        * 'none', None : no trimming of observations
    original : str {'ex','sep','in'}
        * 'ex' : drops the original array returning only the lagged values.
        * 'in' : returns the original array and the lagged values as a single
          array.
        * 'sep' : returns a tuple (original array, lagged values). The original
                  array is truncated to have the same number of rows as
                  the returned lagmat.
    use_pandas : bool, optional
        If true, returns a DataFrame when the input is a pandas
        Series or DataFrame.  If false, return numpy ndarrays.

    Returns
    -------
    lagmat : 2d array
        array with lagged observations
    y : 2d array, optional
        Only returned if original == 'sep'

    Examples
    --------
    >>> from statsmodels.tsa.tsatools import lagmat
    >>> import numpy as np
    >>> X = np.arange(1,7).reshape(-1,2)
    >>> lagmat(X, maxlag=2, trim="forward", original='in')
    array([[ 1.,  2.,  0.,  0.,  0.,  0.],
       [ 3.,  4.,  1.,  2.,  0.,  0.],
       [ 5.,  6.,  3.,  4.,  1.,  2.]])

    >>> lagmat(X, maxlag=2, trim="backward", original='in')
    array([[ 5.,  6.,  3.,  4.,  1.,  2.],
       [ 0.,  0.,  5.,  6.,  3.,  4.],
       [ 0.,  0.,  0.,  0.,  5.,  6.]])

    >>> lagmat(X, maxlag=2, trim="both", original='in')
    array([[ 5.,  6.,  3.,  4.,  1.,  2.]])

    >>> lagmat(X, maxlag=2, trim="none", original='in')
    array([[ 1.,  2.,  0.,  0.,  0.,  0.],
       [ 3.,  4.,  1.,  2.,  0.,  0.],
       [ 5.,  6.,  3.,  4.,  1.,  2.],
       [ 0.,  0.,  5.,  6.,  3.,  4.],
       [ 0.,  0.,  0.,  0.,  5.,  6.]])

    Notes
    -----
    When using a pandas DataFrame or Series with use_pandas=True, trim can only
    be 'forward' or 'both' since it is not possible to consistently extend index
    values.
    Nnone)rX   backwardzDtrim cannot be 'none' or 'forward' when used on Series or DataFramesr   r   )rW   sepzmaxlag should be < nobs)rX   rV   )rY   Zbothztrim option not validc             S   s   g | ]}t |qS r   )str)r!   r>   r   r   r   r#     s    zlagmat.<locals>.<listcomp>c                s   g | ]}t |d    qS )z.L.)r[   )r!   r>   )lag_strr   r   r#     s    )r   r    )rZ   rW   rZ   )r   r&   r'   r   asarrayrB   rM   rK   r   rD   r-   r*   r   r    r%   r[   extendr   ilocrN   )r5   r?   r@   original
use_pandasr6   ZxaZdropidxr7   nvarZlmkZstartobsZstopobsZ	x_columnsr    ZlagrA   Zleadsr   )r\   r   rE   A  sT    A


8






rE   c          	   C   s  |dkr|}t ||}t| d}| jdkrL|r:t| } qh| dddf } n| jdks`| jdkrhtd| j\}}	|r|rt| jdddf ||ddd}
|
jddd|d f g}xRt	d|	D ]D}t| jdd|f ||ddd}
|
|
jdd||d f  qW tj|dd	S |r,t| } t| dddf ||dd
ddd|d f g}xJt	d|	D ]<}|
t| dd|f ||dd
dd||d f  qjW t|S )a  
    Generate lagmatrix for 2d array, columns arranged by variables

    Parameters
    ----------
    x : array_like, 2d
        2d data, observation in rows and variables in columns
    maxlag0 : int
        for first variable all lags from zero to maxlag are included
    maxlagex : None or int
        max lag for all other variables all lags from zero to maxlag are included
    dropex : int (default is 0)
        exclude first dropex lags from other variables
        for all variables, except the first, lags from dropex to maxlagex are
        included
    trim : string
        * 'forward' : trim invalid observations in front
        * 'backward' : trim invalid initial observations
        * 'both' : trim invalid observations on both sides
        * 'none' : no trimming of observations
    use_pandas : bool, optional
        If true, returns a DataFrame when the input is a pandas
        Series or DataFrame.  If false, return numpy ndarrays.

    Returns
    -------
    lagmat : 2d array
        array with lagged observations, columns ordered by variable

    Notes
    -----
    Inefficient implementation for unequal lags, implemented for convenience
    Nr   r   r   z'Only supports 1 and 2-dimensional data.inT)r@   r`   ra   )r   )r@   r`   )maxr   rB   r)   r   	TypeErrorrM   rE   r_   r   appendr1   r   r,   r2   )r5   Zmaxlag0ZmaxlagexZdropexr@   ra   r?   r6   r7   rb   rA   Zlagslirc   r   r   r   	lagmat2ds  s4    $





$
2<rh   c             C   s
   |  dS )NF)ravel)matr   r   r   vec  s    rl   c             C   s   | j tt| S )N)rJ   take_triu_indicesr-   )rk   r   r   r   vech   s    ro   c             C   s   t | \}}||  | S )N)r   Ztril_indices)nrowscolsr   r   r   _tril_indices  s    rs   c             C   s   t | \}}||  | S )N)r   triu_indices)rp   rq   rr   r   r   r   rn   
  s    rn   c             C   s   t | \}}||  | S )N)r   diag_indices)rp   rq   rr   r   r   r   _diag_indices  s    rv   c             C   s8   t tt| }|| t| ks&t| j||fddS )Nri   )r8   )rD   r   sqrtr-   AssertionErrorreshape)vrc   r   r   r   unvec  s    r{   c             C   sl   ddt ddt|     }tt |}t ||f}| |t |< ||j }|t |  d  < |S )Ng      ?r   r      r   )	r   rw   r-   rD   roundrK   rt   rJ   ru   )rz   rq   resultr   r   r   unvech  s    
r   c             C   s,   t | | d  d }t dd |D jS )z
    Create duplication matrix D_n which satisfies vec(S) = D_n vech(S) for
    symmetric matrix S

    Returns
    -------
    D_n : ndarray
    r   r   c             S   s   g | ]}t | qS r   )r   rj   )r!   r5   r   r   r   r#   /  s    z&duplication_matrix.<locals>.<listcomp>)r   eyeZarrayrJ   )rp   tmpr   r   r   duplication_matrix%  s    	r   c             C   s.   t tt| | f}t| |  |dk S )z
    Create the elimination matrix L_n which satisfies vech(M) = L_n vec(M) for
    any matrix M

    Parameters
    ----------

    Returns
    -------

    r   )rl   r   ZtrilZonesr   )rp   Zvech_indicesr   r   r   elimination_matrix1  s    r   c             C   s<   t | | }t | | j| |fdd}|j| ddS )z
    Create the commutation matrix K_{p,q} satisfying vec(A') = K_{p,q} vec(A)

    Parameters
    ----------
    p : int
    q : int

    Returns
    -------
    K : ndarray (pq x pq)
    ri   )r8   r   )r   )r   r   r/   ry   rm   rj   )pqKindicesr   r   r   commutation_matrix@  s    r   c          	   C   s   dt |   dt |     }dt |   dt |     }xdtdt| D ]R}|| }x0t|D ]$}||  |||| d   8  < qnW |d| |d|< qXW |S )z
    Transforms params to induce stationarity/invertability.

    Parameters
    ----------
    params : array
        The AR coefficients

    Reference
    ---------
    Jones(1980)
    r   N)r   expr+   r   r-   )params	newparamsr   jakiterr   r   r   _ar_transparamsQ  s    $r   c             C   s   |   }xvtt| d ddD ]^}| | }x<t|D ]0}| | || || d    d|d   ||< q4W |d| | d|< qW td|  d|    }|S )z
    Inverse of the Jones reparameterization

    Parameters
    ----------
    params : array
        The transformed AR coefficients
    r   r   r   r   N)r+   r   r-   r   log)r   r   r   r   r   Z
invarcoefsr   r   r   _ar_invtransparamsi  s    
r   c          	   C   s   dt |   dt |     }dt |   dt |     }xdtdt| D ]R}|| }x0t|D ]$}||  |||| d   7  < qnW |d| |d|< qXW |S )z
    Transforms params to induce stationarity/invertability.

    Parameters
    ----------
    params : array
        The ma coeffecients of an (AR)MA model.

    Reference
    ---------
    Jones(1980)
    r   N)r   r   r+   r   r-   )r   r   r   r   br   r   r   r   _ma_transparams}  s    $$$r   c             C   s   |   }xvtt| d ddD ]^}| | }x<t|D ]0}| | || || d    d|d   ||< q4W |d| | d|< qW td|  d|    }|S )z
    Inverse of the Jones reparameterization

    Parameters
    ----------
    params : array
        The transformed MA coefficients
    r   r   r   r   N)r+   r   r-   r   r   )Zmacoefsr   r   r   r   Z
invmacoefsr   r   r   _ma_invtransparams  s    	0r   c                s.   d  t  fddt ddD S )a  
    Returns the successive differences needed to unintegrate the series.

    Parameters
    ----------
    x : array-like
        The original series
    d : int
        The number of differences of the differenced series.

    Returns
    -------
    y : array-like
        The increasing differences from 0 to d-1 of the first d elements
        of x.

    See Also
    --------
    unintegrate
    Nc                s    g | ]}t  | d  qS )r   )r   Zdiff)r!   r=   )dr5   r   r   r#     s    z&unintegrate_levels.<locals>.<listcomp>r   r   )r   r]   r   )r5   r   r   )r   r5   r   unintegrate_levels  s    r   c             C   s\   t |dd }t|dkr@|d}tttj|| f |S |d }ttj|| f S )ay  
    After taking n-differences of a series, return the original series

    Parameters
    ----------
    x : array-like
        The n-th differenced series
    levels : list
        A list of the first-value in each differenced series, for
        [first-difference, second-difference, ..., n-th difference]

    Returns
    -------
    y : array-like
        The original series de-differenced

    Examples
    --------
    >>> x = np.array([1, 3, 9., 19, 8.])
    >>> levels = unintegrate_levels(x, 2)
    >>> levels
    array([ 1.,  2.])
    >>> unintegrate(np.diff(x, 2), levels)
    array([  1.,   3.,   9.,  19.,   8.])
    Nr   r   r   )r3   r-   rH   unintegrater   ZcumsumZr_)r5   ZlevelsZx0r   r   r   r     s    
r   c             C   s   t | tjst| } | j } | dks0| dr4dS | dksF| drJdS | dks\| dr`d	S | d
ksr| drvdS | dkrdS | dkrdS | dkrdS td| dS )a-  
    Convert a pandas frequency to a periodicity

    Parameters
    ----------
    freq : str or offset
        Frequency to convert

    Returns
    -------
    period : int
        Periodicity of freq

    Notes
    -----
    Annual maps to 1, quarterly maps to 4, monthly to 12, weekly to 52.
    A)zA-zAS-r   Q)zQ-zQS-   M)zM-ZMS   WzW-4   D   B   H   zDfreq {} not understood. Please report if you think this is in error.N)	r*   r	   Z
DateOffsetr
   Z	rule_codeupper
startswithr'   format)Zfreqr   r   r   freq_to_period  s&    
r   )r   Fr   )Nr   FT)r   r   )rV   rW   F)Nr   rV   F)1Zstatsmodels.compat.pythonr   r   r   r   r   Zstatsmodels.compat.numpyr   Znumpyr   Znumpy.lib.recfunctionslibZrecfunctionsrI   Zpandasr)   r   Zpandas.tseriesr	   Zpandas.tseries.frequenciesr
   Zstatsmodels.tools.sm_exceptionsr   Zstatsmodels.tools.datar   r   r:   rP   rU   rE   rh   rl   ro   rs   rn   rv   r{   r   r   r   r   r   r   r   r   r   r   r   __all__r   r   r   r   <module>   sH   
{
 
*
x 
C")