B
    ZU\                 @   s   d Z ddlmZmZmZmZmZ ddlmZ ddl	Z
ddlmZmZmZ ddlmZmZmZ ddlm  mZ ddlmZ dd	 Zd
d Zdd ZG dd deZG dd deZG dd deZdd Z dd Z!dddZ"dd Z#d ddZ$dS )!zr
Base tools for handling various kinds of data structures, attaching metadata to
results, and doing data cleaning
    )reduce	iteritemslmapziprange)np_matrix_rankN)	DataFrameSeriesisnull)resettable_cachecache_readonlycache_writable)MissingDataErrorc             C   s,   t | jdkr(t |  jdkr(d S d S )N   )npasarrayndimsqueeze)x r   4lib/python3.7/site-packages/statsmodels/base/data.py_asarray_2dcolumns   s    $r   c             C   sB   t | } | jdkr$| dddf } t jt| dddddf S )zy
    Makes sure input is an array and is 2d. Makes sure output is 2d. True
    indicates a null in the rows of 2d x.
    r   N)axis)r   r   r   anyr
   )r   r   r   r   _asarray_2d_null_rows   s    

r   c              G   s0   t | dkr| dggf7 } dd }t||  S )z
    Returns a boolean array which is True where any of the rows in any
    of the _2d_ arrays in arrs are NaNs. Inputs can be any mixture of Series,
    DataFrames or array-like.
    r   Fc             S   s0   t | do| jtko| }tt| |t|B S )Ndtype)hasattrr   boolr   Z
logical_orr   )r   yZx_is_boolean_arrayr   r   r   _nan_row_maybe_two_inputs*   s    
z,_nan_rows.<locals>._nan_row_maybe_two_inputs)lenr   r   )Zarrsr   r   r   r   	_nan_rows!   s    r!   c               @   s  e Zd ZdZdZd<ddZdd Zdd	 Zd
d Ze	dd Z
e	dd Ze	dd Zdd Ze dd Ze dd Zedd Zejdd Zedd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd=d(d)Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Z d4d5 Z!d6d7 Z"d8d9 Z#d:d; Z$dS )>	ModelDatazi
    Class responsible for handling input data and extracting metadata into the
    appropriate form
    Nnonec             K   s   d|kr| d| _d|kr(| d| _|dkr| j|||f|\}}|| _| j| | j| _| j	| _
| | j| j	\| _| _	n,| j| || _|| _
| ||\| _| _	| | |   t | _d S )Ndesign_infoformular#   )popr$   r%   handle_missingZmissing_row_idx__dict__updateendog
orig_endogexog	orig_exog_convert_endog_exog_handle_constant_check_integrityr   _cache)selfr*   r,   missinghasconstkwargsZarraysZnan_idxr   r   r   __init__9   s(    

zModelData.__init__c             C   s0   ddl m } || j}d|kr,|d= d|d< |S )Nr   )copyr$   Trestore_design_info)r7   r(   )r2   r7   dr   r   r   __getstate__S   s    
zModelData.__getstate__c       
      C   s   d|krddl m}m} g }y|d }W n& tk
rN   |d |d }Y nX xpdD ]`}y||d ||d	d
\}}P W qV t|fk
r }	 ztd|  ||	 W d d }	~	X Y qVX qVW |d |j| _|d= | j	
| d S )Nr8   r   )	dmatrices
PatsyErrorframer+   r-   )      r   r      r%   Z	dataframe)Zeval_envZreturn_typeznot in depth %d)Zpatsyr;   r<   KeyErrorjoin	NameErrorprintappendr$   r(   r)   )
r2   r9   r;   r<   excdataZdepth_Zdesigner   r   r   __setstate__[   s(    

zModelData.__setstate__c             C   s  |d k	r*|rd| _ d | _nd| _ d | _nx| jd krDd | _d| _ n^d}| jjdd}t| sltdt|dkd 	 }|j
| _ | j dkr| jd d |f  dkr|| _nd}n| j dkrFg }x|D ]<}| jd d |f  }|dkrd| _ || _P || qW t|dk}| r@d| _ ||  | _nd}n| j dkrXd}n |rtt| jjd | jf}	t|	}
t| j}t||
k| _ d | _d S )Nr   r   F)r   zexog contains inf or nansT)Z
k_constant	const_idxr,   Zptpr   Zisfiniteallr   wherer   sizeZmeanrF   arrayr   ZargmaxZcolumn_stackZonesshaper   int)r2   r4   Zcheck_implicitZptp_rL   valuesidxvalueposZaugmented_exogZ	rank_augmZ	rank_origr   r   r   r/   u   sT    







zModelData._handle_constantc             C   s   || S )Nr   )clsr   nan_maskr   r   r   
_drop_nans   s    zModelData._drop_nansc             C   s   || d d |f S )Nr   )rW   r   rX   r   r   r   _drop_nans_2d   s    zModelData._drop_nans_2dc                s  g }| dd}|dk	r4d}g }|dkrd|dg7 }n0|dk	rN||f}ddg}n|f}dg}|dg7 }d}	g }
t|r*xt|D ]\}}|dks|jdkr||g7 }q|jdkr|t|f7 }||g7 }q| jdkr|t|f7 }||g7 }q|jdkr|	t|f7 }	|
|g7 }
qtd	qW |dk	r|d}|rzt| }|j	d j	d krhtd
|  }|O |	rt|	}|j	d j	d krtd|dk	r||  O }n
|  }|O n(t| |	rtdddf f|	  t
stt||}|	r0|tt|
|	 |rT|tt|dgt|  |dk	r|d|i |dk	r|d|i |g fS |dkrtdn
|dkr  fdd} fdd}tt|t||}|dk	rD|dk	r| } ||}|dk	r ||}|d|i |dk	rD|d|i |	rd|tt|
t||	 |r|tt|dgt|  |t d  fS td| dS )zu
        This returns a dictionary with keys endog, exog and the keys of
        kwargs. It preserves Nones.
        missing_idxNr   r,   r*   r   r   r>   z5Arrays with more than 2 dimensions aren't yet handledzBShape mismatch between endog/exog and extra arrays given to model.zEShape mismatch between endog/exog and extra 2d arrays given to model.raisez!NaNs were encountered in the dataZdropc                s     | S )N)rY   )r   )rW   rX   r   r   <lambda>  s    z*ModelData.handle_missing.<locals>.<lambda>c                s     | S )N)rZ   )r   )rW   rX   r   r   r]     s    z missing option %s not understood)r&   r    r   r   r   r   r   
ValueErrorr!   rQ   r   dictr   r)   r   r   rY   rN   tolist)rW   r*   r,   r3   r5   Znone_array_namesr[   ZcombinedZcombined_namesZcombined_2dZcombined_2d_nameskeyZvalue_arrayZupdated_row_maskZcombined_nansZcombined_2d_nansZ	drop_nansZdrop_nans_2dr   )rW   rX   r   r'      s    




















zModelData.handle_missingc             C   sT   |  |}d }|d k	rL| |}|jdkr:|d d d f }|jdkrLtd||fS )Nr   r>   zexog is not 1d or 2d)	_get_yarr	_get_xarrr   r^   )r2   r*   r,   ZyarrZxarrr   r   r   r.   8  s    



zModelData._convert_endog_exogc             C   s>   | j }| |}|st| j}t|dkr2|d S t|S d S )Nr   r   )r+   
_get_names_make_endog_namesr*   r    list)r2   r*   ynamesr   r   r   rg   F  s    

zModelData.ynamesc             C   s2   | j }|d k	r.| |}|s&t| j}t|S d S )N)r-   rd   _make_exog_namesr,   rf   )r2   r,   xnamesr   r   r   ri   R  s    

zModelData.xnamesc             C   s   | j p
| jS )N)_param_namesri   )r2   r   r   r   param_names\  s    zModelData.param_namesc             C   s
   || _ d S )N)rj   )r2   rS   r   r   r   rk   a  s    c             C   s.   | j }|d k	r| |}n| j}| |}|S )N)r-   _get_row_labelsr+   )r2   r,   
row_labelsr*   r   r   r   rm   e  s    
zModelData.row_labelsc             C   s   d S )Nr   )r2   arrr   r   r   rl   o  s    zModelData._get_row_labelsc             C   sT   t |trt|jS t |tr2|jr,|jgS d S ny|jjS  tk
rN   Y nX d S )N)	
isinstancer   rf   columnsr	   namer   namesAttributeError)r2   rn   r   r   r   rd   r  s    


zModelData._get_namesc             C   sZ   t |rt |}t|}t|dkrR|jdkr8|S |jdkrRt| gS | S )Nr   )	data_util_is_structured_ndarraystruct_to_ndarrayr   r   r    r   r   )r2   r*   r   r   r   rb     s    




zModelData._get_yarrc             C   s   t |rt |}t|S )N)rt   ru   rv   r   r   )r2   r,   r   r   r   rc     s    

zModelData._get_xarrc             C   s*   | j d k	r&t| j t| jkr&tdd S )Nz+endog and exog matrices are different sizes)r,   r    r*   r^   )r2   r   r   r   r0     s    
zModelData._check_integrityrp   c             C   s   |dkr|  |S |dkr$| |S |dkr6| |S |dkrH| |S |dkrZ| |S |dkrl| |S |dkr| ||S |dkr| ||S |d	kr| |S |S d S )
Nrp   ZrowsZcovZdatesZ
columns_eqZcov_eqZgeneric_columnsZgeneric_columns_2drg   )	attach_columnsattach_rows
attach_covattach_datesattach_columns_eqattach_cov_eqattach_generic_columnsattach_generic_columns_2dattach_ynames)r2   objZhowrr   r   r   r   wrap_output  s&    






zModelData.wrap_outputc             C   s   |S )Nr   )r2   resultr   r   r   rw     s    zModelData.attach_columnsc             C   s   |S )Nr   )r2   r   r   r   r   r{     s    zModelData.attach_columns_eqc             C   s   |S )Nr   )r2   r   r   r   r   ry     s    zModelData.attach_covc             C   s   |S )Nr   )r2   r   r   r   r   r|     s    zModelData.attach_cov_eqc             C   s   |S )Nr   )r2   r   r   r   r   rx     s    zModelData.attach_rowsc             C   s   |S )Nr   )r2   r   r   r   r   rz     s    zModelData.attach_datesc             O   s   |S )Nr   )r2   r   argsr5   r   r   r   r}     s    z ModelData.attach_generic_columnsc             O   s   |S )Nr   )r2   r   r   r5   r   r   r   r~     s    z#ModelData.attach_generic_columns_2dc             C   s   |S )Nr   )r2   r   r   r   r   r     s    zModelData.attach_ynames)Nr#   N)rp   N)%__name__
__module____qualname____doc__rj   r6   r:   rK   r/   classmethodrY   rZ   r'   r.   r   rg   ri   propertyrk   setterr   rm   rl   rd   rb   rc   r0   r   rw   r{   ry   r|   rx   rz   r}   r~   r   r   r   r   r   r"   2   s<   
@{


r"   c               @   s   e Zd Zdd ZdS )	PatsyDatac             C   s   |j jS )N)r$   column_names)r2   rn   r   r   r   rd     s    zPatsyData._get_namesN)r   r   r   rd   r   r   r   r   r     s   r   c                   s   e Zd ZdZd fdd	Ze fddZe fddZ fd	d
Zdd Z	dd Z
d ddZdd Zdd Zdd Zdd Zdd Zdd Zdd Z  ZS )!
PandasDataz^
    Data handling class which knows how to reattach pandas metadata to model
    results
    Nc                sV   t |}|d kr|nt |}|jtks<|d k	rD|jtkrDtdtt| ||S )NzRPandas data cast to numpy dtype of object. Check input data with np.asarray(data).)r   r   r   objectr^   superr   r.   )r2   r*   r,   )	__class__r   r   r.     s
    
zPandasData._convert_endog_exogc                s*   t |dr|j| S tt| ||S d S )Nix)r   locr   r   rY   )rW   r   rX   )r   r   r   rY     s    

zPandasData._drop_nansc                s8   t |dr"|j| jd d |f S tt| ||S d S )Nr   )r   r   r   r   rZ   )rW   r   rX   )r   r   r   rZ     s    
zPandasData._drop_nans_2dc                sV   | j | j }}|d k	rDt|drDt|drD| j j| jjsDtdtt|   d S )Nindexz.The indices for endog and exog are not aligned)	r+   r-   r   r   Zequalsr^   r   r   r0   )r2   r*   r,   )r   r   r   r0     s    zPandasData._check_integrityc             C   s$   y|j S  tk
r   | jj S X d S )N)r   rs   r+   )r2   rn   r   r   r   rl     s    zPandasData._get_row_labelsc             C   s   t | |d }t||dS )N)r   )getattrr	   )r2   r   rr   r   r   r   r   r}     s    z!PandasData.attach_generic_columnsc             C   s.   |p|}t | |d }t | |d }t|||dS )N)r   rp   )r   r   )r2   r   ZrownamesZcolnamesr   r   r   r~     s    z$PandasData.attach_generic_columns_2dc             C   s*   |j dkrt|| jdS t|| jdS d S )Nr   )r   )r   r	   rk   r   )r2   r   r   r   r   rw     s    
zPandasData.attach_columnsc             C   s   t || j| jdS )N)r   rp   )r   ri   rg   )r2   r   r   r   r   r{     s    zPandasData.attach_columns_eqc             C   s   t || j| jdS )N)r   rp   )r   rk   )r2   r   r   r   r   ry     s    zPandasData.attach_covc             C   s   t || j| jdS )N)r   rp   )r   rg   )r2   r   r   r   r   r|     s    zPandasData.attach_cov_eqc             C   s   |  }tj| jddjd }|dkrB|j|fkrB|d d d f }|jdk rht|| jt| d  dS t	|| jt| d  | jdS d S )Nr   )ndminr   r>   )r   )r   rp   )
r   r   rP   rg   rQ   r   r	   rm   r    r   )r2   r   squeezedk_endogr   r   r   rx     s    
zPandasData.attach_rowsc             C   sp   |  }tj| jddjd }|dkrB|j|fkrB|d d d f }|jdk rZt|| jdS t|| j| jdS d S )Nr   )r   r   r>   )r   )r   rp   )	r   r   rP   rg   rQ   r   r	   Zpredict_datesr   )r2   r   r   r   r   r   r   rz   )  s    
zPandasData.attach_datesc             C   s2   |  }|jdk r t|| jdS t|| jdS d S )Nr>   )rq   )rp   )r   r   r	   rg   r   )r2   r   r   r   r   r   r   5  s    
zPandasData.attach_ynames)N)N)r   r   r   r   r.   r   rY   rZ   r0   rl   r}   r~   rw   r{   ry   r|   rx   rz   r   __classcell__r   r   )r   r   r     s   		
	r   c             C   s<   | j dks| jd dkr dg}ndd t| jd D }|S )Nr   r   c             S   s   g | ]}d |d  qS )zy%dr   r   ).0ir   r   r   
<listcomp>A  s    z%_make_endog_names.<locals>.<listcomp>)r   rQ   r   )r*   rg   r   r   r   re   =  s    re   c             C   sh   |  d}|dk rF| }dd td| jd D }||d ndd td| jd d D }|S )Nr   c             S   s   g | ]}d | qS )zx%dr   )r   r   r   r   r   r   L  s    z$_make_exog_names.<locals>.<listcomp>r   Zconstc             S   s   g | ]}d | qS )zx%dr   )r   r   r   r   r   r   O  s    )varr   Zargminr   rQ   insert)r,   Zexog_varrL   Z
exog_namesr   r   r   rh   F  s    
rh   r#   c             K   sH   t | |}|dkr0t| |d}|| |d fS |j| |fd|i|S )Nr#   )r*   r,   r3   )handle_data_class_factoryr_   r)   r'   )r*   r,   r3   r5   klassZret_dictr   r   r   r'   T  s    

r'   c             C   sd   t | |rt}nNt | |r$t}n<t | |r6t}n*t | |rHt}ntdt	| t	|f |S )z
    Given inputs
    z%unrecognized data structures: %s / %s)
rt   Z_is_using_ndarray_typer"   Z_is_using_pandasr   Z_is_using_patsyr   Z_is_using_ndarrayr^   type)r*   r,   r   r   r   r   r   ]  s    r   c             K   sR   t | ttfrt| } t |ttfr0t|}t| |}|| f|||d|S )N)r,   r3   r4   )ro   rf   tupler   r   r   )r*   r,   r3   r4   r5   r   r   r   r   handle_datap  s    


r   )Nr#   )r#   N)%r   Zstatsmodels.compat.pythonr   r   r   r   r   Zstatsmodels.compat.numpyr   Znumpyr   Zpandasr   r	   r
   Zstatsmodels.tools.decoratorsr   r   r   Zstatsmodels.tools.dataZtoolsrH   rt   Zstatsmodels.tools.sm_exceptionsr   r   r   r!   r   r"   r   r   re   rh   r'   r   r   r   r   r   r   <module>   s*      n	
	