B
    Z4                 @   s  d Z ddlmZ ddlZddlmZ ddlmZ dd Z	dd	 Z
d
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZedkrdZerddZedZejd d!Zeddd"df Zeeeeedddf   Zd"d"d"gZe eeee eeee dddf e Z!e e! e eeeeed"e e eeee  dd#lm"Z"m#Z# d$d% Z$d&d' Z%d(d) Z&d*d+ Z'd,d- Z(d.d/ Z)er e d0 e e#j*e$d"d1d"d2d!d3 e d4ed"dd"ed  e d5ed"dd"ed  e e#j*e$d6d7d"d8d!d3 e d5ed6dd"ed9 e e#j*e$d6d7d"d:d!d3 e d5ed6dded9 e e#j*e$d6d7d"d;d!d3 e d5ed6d"ded9 e e#j*e%d"d7d"d<d!d3 e e#j*e&dd7d"d=d!d3 d>\ZZ+Z,Z-e d5eee+e,ed9 e e#j*e%e+d7d"ee,e-fd!d3 e e#j*e&e,d7d"ee+e-fd!d3 e d? e e#j*e'd"d1d"d@d!d3 e ed"dd"e dA\ZZ+Z,e d5eee+e,e e e#j*e(e+d7d"ee,fd!d3 e e#j*e)e,d7d"ee+fd!d3 dB\ZZ+Z,e d5eee+e,e e e#j*e(e+d7d"ee,fd!d3 e e#j*e)e,d7d"ee+fd!d3 e dC e ed"e.dd"gdDe$d"dd"dDdE e ed"e.dd"gd e$d"dd"d dE e ed"e.dd"gd e$d"dd"d  e ed"e.ddFgd e$dGddGd  e ed"e.dd"gdDe$d"dd"dD e ed"e.dd"gd e$d"ddGe/dH d  ddIl0m1Z1 d@dJdKdLgZe2dMdNdOZ3x|eD ]t\Z+Z,e#j*e(e+d7d"e3e,fd!d3Z4e#j*e)e,d7d"e3e+fd!d3Z5ee3e+e,eZ6e1e4e6d dPdQdR e1e5e6d" dPdSdR q,W xdTD ]Z-xeD ]\Z+Z,e#j*e%e+d7d"e3e,e-fd!d3Z4e#j*e&e,d7d"e3e+e-fd!d3Z5ee3e+e,ee-Z6e1e4e6d dUdQdR e1e5e6d" dUdSdR e1ee3e.e+e,d ge-e$e3e+e,e-dPdVdR e1ee3e.e+e,d ge-e$e3e+e,e/e-dN e-  e-dPdVdR qW qW dS )Wa  gradient/Jacobian of normal and t loglikelihood

use chain rule

normal derivative wrt mu, sigma and beta

new version: loc-scale distributions, derivative wrt loc, scale

also includes "standardized" t distribution (for use in GARCH)

TODO:
* use sympy for derivative of loglike wrt shape parameters
  it works for df of t distribution dlog(gamma(a))da = polygamma(0,a) check
  polygamma is available in scipy.special
* get loc-scale example to work with mean = X*b
* write some full unit test examples

A: josef-pktd

    )print_functionN)special)gammalnc             C   s<   |j \}}dtdtj t| | | d |   }|S )ar  normal loglikelihood given observations and mean mu and variance sigma2

    Parameters
    ----------
    y : array, 1d
        normally distributed random variable
    params: array, (nobs, 2)
        array of mean, variance (mu, sigma2) with observations in rows

    Returns
    -------
    lls : array
        contribution to loglikelihood for each observation
    g         )Tnplogpi)yparamsmusigma2lls r   Clib/python3.7/site-packages/statsmodels/sandbox/regression/tools.pynorm_lls   s    
.r   c             C   sB   |j \}}| | | }| | d | d t| }t||fS )a=  Jacobian of normal loglikelihood wrt mean mu and variance sigma2

    Parameters
    ----------
    y : array, 1d
        normally distributed random variable
    params: array, (nobs, 2)
        array of mean, variance (mu, sigma2) with observations in rows

    Returns
    -------
    grad : array (nobs, 2)
        derivative of loglikelihood for each observation wrt mean in first
        column, and wrt variance in second column

    Notes
    -----
    this is actually the derivative wrt sigma not sigma**2, but evaluated
    with parameter sigma2 = sigma**2

    r      )r   r   sqrtcolumn_stack)r
   r   r   r   ZdllsdmuZdllsdsigma2r   r   r   norm_lls_grad0   s    
r   c             C   s   | S )z-gradient/Jacobian for d (x*beta)/ d beta
    r   )xbetar   r   r   	mean_gradL   s    r   c       
      C   s   |dd }|d t t| df }t||}t ||}t ||f}t| |}t |ddddf | |ddddf f}	|	S )a  Jacobian of normal loglikelihood wrt mean mu and variance sigma2

    Parameters
    ----------
    y : array, 1d
        normally distributed random variable with mean x*beta, and variance sigma2
    x : array, 2d
        explanatory variables, observation in rows, variables in columns
    params: array_like, (nvars + 1)
        array of coefficients and variance (beta, sigma2)

    Returns
    -------
    grad : array (nobs, 2)
        derivative of loglikelihood for each observation wrt mean in first
        column, and wrt scale (sigma) in second column
    assume params = (beta, sigma2)

    Notes
    -----
    TODO: for heteroscedasticity need sigma to be a 1d array

    Nr   )r   oneslenr   dotr   r   )
r
   r   r   r   r   Zdmudbetar   Zparams2ZdllsdmsZgradr   r   r   normgradQ   s    

2r   c             C   s   |j \}}|d }t|d d t|d  dt|d tj   }||d d td| | d |d  |   dt|  8 }|S )a  t loglikelihood given observations and mean mu and variance sigma2 = 1

    Parameters
    ----------
    y : array, 1d
        normally distributed random variable
    params: array, (nobs, 2)
        array of mean, variance (mu, sigma2) with observations in rows
    df : integer
        degrees of freedom of the t distribution

    Returns
    -------
    lls : array
        contribution to loglikelihood for each observation

    Notes
    -----
    parameterized for garch
    g      ?r   g       @g      ?r   )r   r   r   r   r	   )r
   r   dfr   r   r   r   r   r   tstd_llsu   s
    
4@r   c             C   s   |  S )z?derivative of log pdf of standard normal with respect to y
    r   )r
   r   r   r   
norm_dlldy   s    r    c             C   s"   |d  | d| d |   |  S )zderivative of log pdf of standardized (?) t with respect to y

    Notes
    -----
    parameterized for garch, with mean 0 and variance 1
    r   r   r   )r
   r   r   r   r   ts_dlldy   s    	r!   c             C   sn   t |d }t t|d d t|d  t |d t  }|d| d |d   |d d   }|S )zIpdf for standardized (not standard) t distribution, variance is one

    g      ?r   g       @r   )r   arrayZexpr   r   r   r	   )r   r   rZPxr   r   r   tstd_pdf   s    8$r$   c             C   s   t | || |j\}}|d }t|d d t|d  dt|tj   }||d d td| | d | |   dt|  8 }|S )a  t loglikelihood given observations and mean mu and variance sigma2 = 1

    Parameters
    ----------
    y : array, 1d
        normally distributed random variable
    params: array, (nobs, 2)
        array of mean, variance (mu, sigma2) with observations in rows
    df : integer
        degrees of freedom of the t distribution

    Returns
    -------
    lls : array
        contribution to loglikelihood for each observation

    Notes
    -----
    parameterized for garch
    normalized/rescaled so that sigma2 is the variance

    >>> df = 10; sigma = 1.
    >>> stats.t.stats(df, loc=0., scale=sigma.*np.sqrt((df-2.)/df))
    (array(0.0), array(1.0))
    >>> sigma = np.sqrt(2.)
    >>> stats.t.stats(df, loc=0., scale=sigma*np.sqrt((df-2.)/df))
    (array(0.0), array(2.0))
    g      ?r   g       @g      ?r   )printr   r   r   r   r	   )r
   r   r   r   r   r   r   r   r   ts_lls   s    
0<r&   c             C   s*   |d }|d  | d| d |   |  S )a  derivative of log pdf of standard t with respect to y

    Parameters
    ----------
    y : array_like
        data points of random variable at which loglike is evaluated
    df : array_like
        degrees of freedom,shape parameters of log-likelihood function
        of t distribution

    Returns
    -------
    dlldy : array
        derivative of loglikelihood wrt random variable y evaluated at the
        points given in y

    Notes
    -----
    with mean 0 and scale 1, but variance is df/(df-2)

    g      ?r   r   r   )r
   r   r   r   r   r!      s    c             C   s*   |d  |d  d| d |d    |  S )a  derivative of log pdf of standardized t with respect to y

        Parameters
    ----------
    y : array_like
        data points of random variable at which loglike is evaluated
    df : array_like
        degrees of freedom,shape parameters of log-likelihood function
        of t distribution

    Returns
    -------
    dlldy : array
        derivative of loglikelihood wrt random variable y evaluated at the
        points given in y


    Notes
    -----
    parameterized for garch, standardized to variance=1
    r   g       @r   r   )r
   r   r   r   r   
tstd_dlldy   s    r'   c             G   sN   | | | }||f|  | }d| ||f| | |  |d   }||fS )a  derivative of log-likelihood with respect to location and scale

    Parameters
    ----------
    y : array_like
        data points of random variable at which loglike is evaluated
    loc : float
        location parameter of distribution
    scale : float
        scale parameter of distribution
    dlldy : function
        derivative of loglikelihood fuction wrt. random variable x
    args : array_like
        shape parameters of log-likelihood function

    Returns
    -------
    dlldloc : array
        derivative of loglikelihood wrt location evaluated at the
        points given in y
    dlldscale : array
        derivative of loglikelihood wrt scale evaluated at the
        points given in y

    g      r   r   )r
   locscaleZdlldyargsZystZdlldlocZ	dlldscaler   r   r   locscale_grad
  s    &r+   __main__g?r   
      r   )statsmiscc             C   s   t tjj| |||dS )N)r(   r)   )r   r   r/   tpdf)r
   r(   r)   r   r   r   r   llt=  s    r3   c             C   s   t tjj||| |dS )N)r(   r)   )r   r   r/   r1   r2   )r(   r
   r)   r   r   r   r   lltloc?  s    r4   c             C   s   t tjj|||| dS )N)r(   r)   )r   r   r/   r1   r2   )r)   r
   r(   r   r   r   r   lltscaleA  s    r5   c             C   s   t tjj| ||dS )N)r(   r)   )r   r   r/   normr2   )r
   r(   r)   r   r   r   llnormD  s    r7   c             C   s   t tjj|| |dS )N)r(   r)   )r   r   r/   r6   r2   )r(   r
   r)   r   r   r   	llnormlocF  s    r8   c             C   s   t tjj||| dS )N)r(   r)   )r   r   r/   r6   r2   )r)   r
   r(   r   r   r   llnormscaleH  s    r9   z
gradient of tgư>)r   r   r-   )Zdxnr*   orderzt Ztsg      ?g|=)r   r      r<   )r   r   r<   )r   r   r<   )g      ?r   r<   )g      ?r   r<   )g      ?r   r   r<   z
gradient of norm)r   r   )g      ?r   r   )g      ?r   r   z
loglike of td   zdifferently standardizedg      ?g      ?g?)assert_almost_equal)g      ?g      ?)g        g       @)g      ?g       @g       g       @      z	deriv loc)Zerr_msgzderiv scale)r.   r-   r=      Zloglike)7__doc__Z
__future__r   Znumpyr   Zscipyr   Zscipy.specialr   r   r   r   r   r   r    r!   r$   r&   r'   r+   __name__verboseZsigr   r   ZrandomZrandnZrvsr   r   r
   r   r%   Z	dllfdbetar/   r0   r3   r4   r5   r7   r8   r9   Z
derivativer(   r)   r   r"   r   Znumpy.testingr>   ZlinspaceZytZdlldloZdlldscZgrr   r   r   r   <module>   s   $
'

 
   

((&&&0 