B
    ¤ŠãZâ=  ã               @   s  d Z ddlmZ ddlZddlZddlmZ ddlm	Z	 ddlm
Z
 ddlmZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZmZ G dd„ deƒZdd„ Zi Zdd„ ed< dd„ ed< dd„ ed< dd„ ed< eed< d"dd„Zdd„ Zd#dd„ZG d d!„ d!eƒZdS )$aM  
Quantile regression model

Model parameters are estimated using iterated reweighted least squares. The
asymptotic covariance matrix estimated using kernel density estimation.

Author: Vincent Arel-Bundock
License: BSD-3
Created: 2013-03-19

The original IRLS function was written for Matlab by Shapour Mohammadi,
University of Tehran, 2008 (shmohammadi@gmail.com), with some lines based on
code written by James P. Lesage in Applied Econometrics Using MATLAB(1999).PP.
73-4.  Translated to python with permission from original author by Christian
Prinoth (christian at prinoth dot name).
é    )ÚrangeN)Úpinv)Únorm)Ú	chain_dot)Únp_matrix_rank)Úcache_readonly)ÚRegressionModelÚRegressionResultsÚRegressionResultsWrapper)ÚConvergenceWarningÚIterationLimitWarningc                   s2   e Zd ZdZ‡ fdd„Zdd„ Zddd„Z‡  ZS )ÚQuantRegaË  Quantile Regression

    Estimate a quantile regression model using iterative reweighted least
    squares.

    Parameters
    ----------
    endog : array or dataframe
        endogenous/response variable
    exog : array or dataframe
        exogenous/explanatory variable(s)

    Notes
    -----
    The Least Absolute Deviation (LAD) estimator is a special case where
    quantile is set to 0.5 (q argument of the fit method).

    The asymptotic covariance matrix is estimated following the procedure in
    Greene (2008, p.407-408), using either the logistic or gaussian kernels
    (kernel argument of the fit method).

    References
    ----------
    General:

    * Birkes, D. and Y. Dodge(1993). Alternative Methods of Regression, John Wiley and Sons.
    * Green,W. H. (2008). Econometric Analysis. Sixth Edition. International Student Edition.
    * Koenker, R. (2005). Quantile Regression. New York: Cambridge University Press.
    * LeSage, J. P.(1999). Applied Econometrics Using MATLAB,

    Kernels (used by the fit method):

    * Green (2008) Table 14.2

    Bandwidth selection (used by the fit method):

    * Bofinger, E. (1975). Estimation of a density function using order statistics. Australian Journal of Statistics 17: 1-17.
    * Chamberlain, G. (1994). Quantile regression, censoring, and the structure of wages. In Advances in Econometrics, Vol. 1: Sixth World Congress, ed. C. A. Sims, 171-209. Cambridge: Cambridge University Press.
    * Hall, P., and S. Sheather. (1988). On the distribution of the Studentized quantile. Journal of the Royal Statistical Society, Series B 50: 381-391.

    Keywords: Least Absolute Deviation(LAD) Regression, Quantile Regression,
    Regression, Robust Estimation.
    c                s   t t| ƒj||f|Ž d S )N)Úsuperr   Ú__init__)ÚselfÚendogÚexogÚkwargs)Ú	__class__© úIlib/python3.7/site-packages/statsmodels/regression/quantile_regression.pyr   P   s    zQuantReg.__init__c             C   s   |S )zE
        QuantReg model whitener does nothing: returns data.
        r   )r   Údatar   r   r   ÚwhitenS   s    zQuantReg.whitenç      à?ÚrobustÚepaÚ	hsheatheréè  çíµ ÷Æ°>c       !      K   sÎ  |dk s|dkrt dƒ‚dddddg}||krBt d	d
 |¡ ƒ‚nt| }|dkrXt}n$|dkrft}n|dkrtt}nt dƒ‚| j}	| j}
| j}t	| jƒ}|| _
t| j
| j ƒ| _| j| j
 | _d}|
}t |¡}d}d}tg g d}x^||k rD||krD|sD|d7 }|}t |j|
¡}t |j|	¡}t t|ƒ|¡}|	t |
|¡ }t |¡dk }|| dkd d d ||< t |dk || d| | ¡}t |¡}|
|dd…tjf  }t t || ¡¡}|d  |¡ |d  t || ¡¡ |dkrè|d dkrèx@tddƒD ]2}t ||d |  k¡rd}t dt¡ P qW qèW ||krht dt |ƒ d t!¡ |	t |
|¡ }t" #|d¡t" #|d¡ }|||ƒ}t$t %|	¡|d ƒt& '|| ¡t& '|| ¡  }d ||  t (||| ƒ¡ }|d!krXt |dk|| d d| | d ¡}tt |
j|
¡ƒ}t |
j|tjdd…f  |
¡}t)|||ƒ}n>|d"krŽd | d | d|  tt |
j|
¡ƒ }nt d#ƒ‚t*| ||d$} || _+|| _,d | | _-|| _.|| _/t0| ƒS )%aì  Solve by Iterative Weighted Least Squares

        Parameters
        ----------
        q : float
            Quantile must be between 0 and 1
        vcov : string, method used to calculate the variance-covariance matrix
            of the parameters. Default is ``robust``:

            - robust : heteroskedasticity robust standard errors (as suggested
              in Greene 6th edition)
            - iid : iid errors (as in Stata 12)

        kernel : string, kernel to use in the kernel density estimation for the
            asymptotic covariance matrix:

            - epa: Epanechnikov
            - cos: Cosine
            - gau: Gaussian
            - par: Parzene

        bandwidth: string, Bandwidth selection method in kernel density
            estimation for asymptotic covariance estimate (full
            references in QuantReg docstring):

            - hsheather: Hall-Sheather (1988)
            - bofinger: Bofinger (1975)
            - chamberlain: Chamberlain (1994)
        r   é   zp must be between 0 and 1ÚbiwÚcosr   ÚgauÚparzkernel must be one of z, r   ÚbofingerÚchamberlainz;bandwidth must be in 'hsheather', 'bofinger', 'chamberlain'é
   F)ÚparamsÚmsegíµ ÷Æ°>é   Nr'   r(   i,  éd   TzConvergence cycle detectedzMaximum number of iterations (z
) reached.éK   é   gq=
×£põ?g      ð?r   Ziidzvcov must be 'robust' or 'iid')Znormalized_cov_params)1Ú	ExceptionÚjoinÚkernelsÚhall_sheatherr$   r%   r   r   Únobsr   ZrankÚfloatZ
k_constantZdf_modelZdf_residÚnpZonesÚdictÚdotÚTr   ÚabsÚwhereZnewaxisÚmaxÚappendZmeanr   ÚallÚwarningsÚwarnr   Ústrr   ÚstatsÚscoreatpercentileÚminZstdr   ÚppfÚsumr   ÚQuantRegResultsÚqZ
iterationsÚsparsityÚ	bandwidthÚhistoryr
   )!r   rE   ZvcovZkernelrG   Zmax_iterZp_tolr   Z
kern_namesr   r   r1   Z	exog_rankZn_iterZxstarZbetaZdiffÚcyclerH   Zbeta0ZxtxZxtyÚresidÚmaskZiiÚeZiqreÚhZfhat0ÚdZxtxiZxtdxZlfitr   r   r   ÚfitY   sŽ     






&
& 
,
zQuantReg.fit)r   r   r   r   r   r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   rO   Ú__classcell__r   r   )r   r   r   #   s
   + r   c          	   C   sb   t  t  | ¡dkdd| d   dt  | ¡d   ddt  | ¡ d  d ¡}d|t  | ¡dk< |S )	Ng      à?gUUUUUUõ?g       @r)   é   r   g      @r   )r3   r8   r7   )ÚuÚzr   r   r   Ú_parzenã   s    0rX   c             C   s,   dd| d  d  t  t  | ¡dkdd¡ S )Ng      î?r   r)   r   )r3   r8   r7   )rV   r   r   r   Ú<lambda>ë   s    rY   r    c             C   s,   t  t  | ¡dkdt  dt j |  ¡ d¡S )Ng      à?r   r)   r   )r3   r8   r7   r!   Zpi)rV   r   r   r   rY   ì   s    r!   c             C   s(   dd| d   t  t  | ¡dkdd¡ S )Ng      è?r   r)   r   )r3   r8   r7   )rV   r   r   r   rY   í   s    r   c             C   s
   t  | ¡S )N)r   Úpdf)rV   r   r   r   rY   î   s    r"   r#   çš™™™™™©?c             C   sZ   t  |¡}dt  |¡d  }d|d  d }| d t  d|d  ¡d  || d  }|S )Ng      ø?g       @g      ð?gUUUUUUÕ¿gUUUUUUå?gUUUUUUÕ?)r   rB   rZ   )ÚnrE   ÚalpharW   ÚnumÚdenrM   r   r   r   r0   ÷   s
    
*r0   c             C   sN   dt  dt  |¡ ¡d  }dt  |¡d  d d }| d || d  }|S )Ng      @r)   é   r   gš™™™™™É¿gš™™™™™É?)r   rZ   rB   )r\   rE   r^   r_   rM   r   r   r   r$   ÿ   s    r$   c             C   s(   t  d|d  ¡t |d|  |  ¡ S )Nr   r)   )r   rB   r3   Zsqrt)r\   rE   r]   r   r   r   r%     s    r%   c               @   sÖ   e Zd ZdZedd„ ƒZdd„ Zedd„ ƒZedd	„ ƒZed
d„ ƒZ	edd„ ƒZ
edd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZedd„ ƒZed d!„ ƒZd&d$d%„Zd"S )'rD   z'Results instance for the QuantReg modelc             C   s   | j }| jj}| j}t |dk d| | || ¡}t |¡}|t ||d ¡ }t |dk d| | || ¡}t |¡}dt 	|¡t 	|¡  S )Nr   r   r*   )
rE   Úmodelr   rJ   r3   r8   r7   r?   r@   rC   )r   rE   r   rL   Zeredr   r   r   Ú	prsquared  s    

zQuantRegResults.prsquaredc             C   s   dS )Ng      ð?r   )r   r   r   r   Úscale  s    zQuantRegResults.scalec             C   s   t jS )N)r3   Únan)r   r   r   r   Úbic  s    zQuantRegResults.bicc             C   s   t jS )N)r3   rd   )r   r   r   r   Úaic!  s    zQuantRegResults.aicc             C   s   t jS )N)r3   rd   )r   r   r   r   Úllf%  s    zQuantRegResults.llfc             C   s   t jS )N)r3   rd   )r   r   r   r   Úrsquared)  s    zQuantRegResults.rsquaredc             C   s   t jS )N)r3   rd   )r   r   r   r   Úrsquared_adj-  s    zQuantRegResults.rsquared_adjc             C   s   t jS )N)r3   rd   )r   r   r   r   r(   1  s    zQuantRegResults.msec             C   s   t jS )N)r3   rd   )r   r   r   r   Ú	mse_model5  s    zQuantRegResults.mse_modelc             C   s   t jS )N)r3   rd   )r   r   r   r   Ú	mse_total9  s    zQuantRegResults.mse_totalc             C   s   t jS )N)r3   rd   )r   r   r   r   Úcentered_tss=  s    zQuantRegResults.centered_tssc             C   s   t jS )N)r3   rd   )r   r   r   r   Úuncentered_tssA  s    zQuantRegResults.uncentered_tssc             C   s   t ‚d S )N)ÚNotImplementedError)r   r   r   r   ÚHC0_seE  s    zQuantRegResults.HC0_sec             C   s   t ‚d S )N)rn   )r   r   r   r   ÚHC1_seI  s    zQuantRegResults.HC1_sec             C   s   t ‚d S )N)rn   )r   r   r   r   ÚHC2_seM  s    zQuantRegResults.HC2_sec             C   s   t ‚d S )N)rn   )r   r   r   r   ÚHC3_seQ  s    zQuantRegResults.HC3_seNçš™™™™™©?c             C   s:  | j }| j}ddddgfddg}dd| j gfd	d| j gfd
d| j gfdddg}|dkrn| jjjd d }ddlm	}	 |	ƒ }
|
j
| |||||d |
j| ||ddd g }|d dk rîd}|d7 }|d7 }|d7 }||d  }| |¡ n8|dkr&d}|d7 }|d 7 }|d!7 }|| }| |¡ |r6|
 |¡ |
S )"a
  Summarize the Regression Results

        Parameters
        -----------
        yname : string, optional
            Default is `y`
        xname : list of strings, optional
            Default is `var_##` for ## in p the number of regressors
        title : string, optional
            Title for the top table. If not None, then this replaces the
            default title
        alpha : float
            significance level for the confidence intervals

        Returns
        -------
        smry : Summary instance
            this holds the summary tables and text, which can be printed or
            converted to various output formats.

        See Also
        --------
        statsmodels.iolib.summary.Summary : class to hold summary
            results

        )zDep. Variable:N)zModel:NzMethod:zLeast Squares)zDate:N)zTime:NzPseudo R-squared:z%#8.4gz
Bandwidth:z	Sparsity:)zNo. Observations:N)zDf Residuals:N)z	Df Model:NNú zRegression Resultsr   )ÚSummary)ZgleftZgrightÚynameÚxnameÚtitlegš™™™™™©?T)rv   rw   r]   Zuse_téÿÿÿÿg»½×Ùß|Û=z6The smallest eigenvalue is %6.3g. This might indicate zthat there are
z5strong multicollinearity problems or that the design zmatrix is singular.iè  z1The condition number is large, %6.3g. This might zindicate that there are
z,strong multicollinearity or other numerical z	problems.)Z	eigenvalsZcondition_numberrb   rG   rF   ra   r   rP   Zstatsmodels.iolib.summaryru   Zadd_table_2colsZadd_table_paramsr:   Zadd_extra_txt)r   rv   rw   rx   r]   ZeigvalsZcondnoZtop_leftZ	top_rightru   ZsmryZetextZwstrr   r   r   ÚsummaryU  sN    "



zQuantRegResults.summary)NNNrs   )rP   rQ   rR   rS   r   rb   rc   re   rf   rg   rh   ri   r(   rj   rk   rl   rm   ro   rp   rq   rr   rz   r   r   r   r   rD   
  s$   rD   )r[   )r[   )rS   Zstatsmodels.compat.pythonr   Znumpyr3   r<   Zscipy.statsr?   Zscipy.linalgr   r   Zstatsmodels.tools.toolsr   Zstatsmodels.compat.numpyr   Zstatsmodels.tools.decoratorsr   Z#statsmodels.regression.linear_modelr   r	   r
   Zstatsmodels.tools.sm_exceptionsr   r   r   rX   r/   r0   r$   r%   rD   r   r   r   r   Ú<module>   s0    A

