B
    Z(B              4   @   s  d Z ddlmZmZmZ ddlmZ ddlZddl	m
Z ddlmZ dddZd	d
 ZdddZdd Zdd Zdd ZdddZdddZdddZdddZdddZddd Zdd#d$Zdd&d'Zed(kred) ed* d+d+d+d+d+gZd,d-d.d/d0gZxeD ]Z eee  qW xeD ]Z eee  q W eee eee d1d2d3d4d5d6d+d7d8d9d:d;d<gZ!e"d= e#d> e$d? e%dd!d@Z&e'e&ee& e"d= e#dA e$d? e%dd!dBZ&e'e&eeee&d!e&  e(dddCgdDdDdDgdEdDdFggZ)e)*dZ+e)*d!Z,ee+Z-ee,Z.ee)Z/ee+e,e)Z0ee,e+e)Z1edej2e3e+e, Z4edej2e3e,e+ Z5ee+e,e)Z6edG ee-e.e/e0e1e4e5e6 edH e(dIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\dYd]d^d_d`dadbdcdddedfdgdhdidjdkdld^dmdndodpdqdrdsdtdudvdwdxg2ZeeZ7e edy edz ed{ ed| ed} ed~e+  ed d!e+d  Z8ede8  ee8d!e8 gZ9ede9  ede9e8e:d  e0f  ed ede8e-d! e:d f  ed ed e(dddgdCdCdCgdFdCd<ggZ;ee; ed ede<dee;d! ee;d gd! e:d   ed e(dddgdddgdd,dggZ=dS )a:  
Information Theoretic and Entropy Measures

References
----------
Golan, As. 2008. "Information and Entropy Econometrics -- A Review and
    Synthesis." Foundations And Trends in Econometrics 2(1-2), 1-145.

Golan, A., Judge, G., and Miller, D.  1996.  Maximum Entropy Econometrics.
    Wiley & Sons, Chichester.
    )rangelziplmap)statsN)pyplot)	logsumexpc             C   sf   |dkrt | S t| } t| j}d||< | j|d}tt| || j	|d}|| }|S )a-  
    Compute the log of the sum of exponentials log(e^{a_1}+...e^{a_n}) of a

    Avoids numerical overflow.

    Parameters
    ----------
    a : array-like
        The vector to exponentiate and sum
    axis : int, optional
        The axis along which to apply the operation.  Defaults is None.

    Returns
    -------
    sum(log(exp(a)))

    Notes
    -----
    This function was taken from the mailing list
    http://mail.scipy.org/pipermail/scipy-user/2009-October/022931.html

    This should be superceded by the ufunc when it is finished.
    N   )axis)
sp_logsumexpnpasarraylistshapemaxlogZexpZreshapesum)ar	   ZshpZa_maxsZlse r   ;lib/python3.7/site-packages/statsmodels/sandbox/infotheo.pyr   -   s    

"r   c             C   sD   t | } t t | dr8t | dkr8t | dks<dS dS dS )zC
    Checks to see if `X` is a proper probability distribution
    r   r   FTN)r   r   Zallcloser   all)Xr   r   r   _isproperdistQ   s    
.r   efc             C   s   t | }|dkr tt|}|dkr@t|t|  | }|dkrt| t|  }t|| }t	| \}}t
|}d}|d }	|||d < xLtd|D ]>}
||
 |	| k r||||
 < q||
 }	|d7 }||||
 < qW |S )z
    Discretize `X`

    Parameters
    ----------
    bins : int, optional
        Number of bins.  Default is floor(sqrt(N))
    method : string
        "ef" is equal-frequency binning
        "ew" is equal-width binning

    Examples
    --------
    Nr   Zewr   r   )lenr   ZfloorZsqrtZceilr   Zrankdatar   minZfastsortZzerosr   )r   methodZnbinsZnobsZdiscretewidthZsvecZivecZbinnumbaseir   r   r   
discretize[   s(    
r    c             C   s   t |t |  S )z
    There is a one-to-one transformation of the entropy value from
    a log base b to a log base a :

    H_{b}(X)=log_{b}(a)[H_{a}(X)]

    Returns
    -------
    log_{b}(a)
    )r   r   )r   br   r   r   logbasechange   s    r"   c             C   s   t tjd|  S )z$
    Converts from nats to bits
       )r"   r   e)r   r   r   r   
natstobits   s    r%   c             C   s   t dtj|  S )z$
    Converts from bits to nats
    r#   )r"   r   r$   )r   r   r   r   
bitstonats   s    r&   r#   c             C   sh   t | } t | dkr&t | dks.tdt t | t |   }|dkr`td|| S |S dS )aD  
    This is Shannon's entropy

    Parameters
    -----------
    logbase, int or np.e
        The base of the log
    px : 1d or 2d array_like
        Can be a discrete probability distribution, a 2d joint distribution,
        or a sequence of probabilities.

    Returns
    -----
    For log base 2 (bits) given a discrete distribution
        H(p) = sum(px * log2(1/px) = -sum(pk*log2(px)) = E[log2(1/p(X))]

    For log base 2 (bits) given a joint distribution
        H(px,py) = -sum_{k,j}*w_{kj}log2(w_{kj})

    Notes
    -----
    shannonentropy(0) is defined as 0
    r   r   z&px does not define proper distributionr#   N)r   r   r   
ValueErrorr   
nan_to_numlog2r"   )pxlogbaseentropyr   r   r   shannonentropy   s    
r-   c             C   s\   t | } t | dkr&t | dks.td|dkrLtd| t |  S t |  S dS )z
    Shannon's information

    Parameters
    ----------
    px : float or array-like
        `px` is a discrete probability distribution

    Returns
    -------
    For logbase = 2
    np.log2(px)
    r   r   z&px does not define proper distributionr#   N)r   r   r   r'   r"   r)   )r*   r+   r   r   r   shannoninfo   s    
r.   c          	   C   s   t | rt |std|dkr0t |s0td|dkrDt|| }t|tt||  }|dkrn|S td|| S dS )a  
    Return the conditional entropy of X given Y.

    Parameters
    ----------
    px : array-like
    py : array-like
    pxpy : array-like, optional
        If pxpy is None, the distributions are assumed to be independent
        and conendtropy(px,py) = shannonentropy(px)
    logbase : int or np.e

    Returns
    -------
    sum_{kj}log(q_{j}/w_{kj}

    where q_{j} = Y[j]
    and w_kj = X[k,j]
    z1px or py is not a proper probability distributionNz&pxpy is not a proper joint distribtionr#   )r   r'   r   outerr   r(   r)   r"   )r*   pypxpyr+   condentr   r   r   condentropy   s    r3   c             C   s`   t | rt |std|dkr0t |s0td|dkrDt|| }t| |dt| |||d S )aC  
    Returns the mutual information between X and Y.

    Parameters
    ----------
    px : array-like
        Discrete probability distribution of random variable X
    py : array-like
        Discrete probability distribution of random variable Y
    pxpy : 2d array-like
        The joint probability distribution of random variables X and Y.
        Note that if X and Y are independent then the mutual information
        is zero.
    logbase : int or np.e, optional
        Default is 2 (bits)

    Returns
    -------
    shannonentropy(px) - condentropy(px,py,pxpy)
    z1px or py is not a proper probability distributionNz&pxpy is not a proper joint distribtion)r+   )r   r'   r   r/   r-   r3   )r*   r0   r1   r+   r   r   r   
mutualinfo   s    r4   c             C   s`   t | rt |std|dkr0t |s0td|dkrDt|| }t| |||dt||d S )aa  
    An information theoretic correlation measure.

    Reflects linear and nonlinear correlation between two random variables
    X and Y, characterized by the discrete probability distributions px and py
    respectively.

    Parameters
    ----------
    px : array-like
        Discrete probability distribution of random variable X
    py : array-like
        Discrete probability distribution of random variable Y
    pxpy : 2d array-like, optional
        Joint probability distribution of X and Y.  If pxpy is None, X and Y
        are assumed to be independent.
    logbase : int or np.e, optional
        Default is 2 (bits)

    Returns
    -------
    mutualinfo(px,py,pxpy,logbase=logbase)/shannonentropy(py,logbase=logbase)

    Notes
    -----
    This is also equivalent to

    corrent(px,py,pxpy) = 1 - condent(px,py,pxpy)/shannonentropy(py)
    z1px or py is not a proper probability distributionNz&pxpy is not a proper joint distribtion)r+   )r   r'   r   r/   r4   r-   )r*   r0   r1   r+   r   r   r   corrent  s    r5   c             C   sd   t | rt |std|dkr0t |s0td|dkrDt|| }t| |||dt|| ||d S )ak  
    An information theoretic covariance measure.

    Reflects linear and nonlinear correlation between two random variables
    X and Y, characterized by the discrete probability distributions px and py
    respectively.

    Parameters
    ----------
    px : array-like
        Discrete probability distribution of random variable X
    py : array-like
        Discrete probability distribution of random variable Y
    pxpy : 2d array-like, optional
        Joint probability distribution of X and Y.  If pxpy is None, X and Y
        are assumed to be independent.
    logbase : int or np.e, optional
        Default is 2 (bits)

    Returns
    -------
    condent(px,py,pxpy,logbase=logbase) + condent(py,px,pxpy,
            logbase=logbase)

    Notes
    -----
    This is also equivalent to

    covent(px,py,pxpy) = condent(px,py,pxpy) + condent(py,px,pxpy)
    z1px or py is not a proper probability distributionNz&pxpy is not a proper joint distribtion)r+   )r   r'   r   r/   r2   )r*   r0   r1   r+   r   r   r   covent=  s    r6   r   Rc             C   s   t | stdt|}|dkrBt| }|dkr>td|| S |S dt| ks\|tjkrnt	t
|  S | | } t	|  }|dkrdd|  | S dd|  td| | S dS )as  
    Renyi's generalized entropy

    Parameters
    ----------
    px : array-like
        Discrete probability distribution of random variable X.  Note that
        px is assumed to be a proper probability distribution.
    logbase : int or np.e, optional
        Default is 2 (bits)
    alpha : float or inf
        The order of the entropy.  The default is 1, which in the limit
        is just Shannon's entropy.  2 is Renyi (Collision) entropy.  If
        the string "inf" or numpy.inf is specified the min-entropy is returned.
    measure : str, optional
        The type of entropy measure desired.  'R' returns Renyi entropy
        measure.  'T' returns the Tsallis entropy measure.

    Returns
    -------
    1/(1-alpha)*log(sum(px**alpha))

    In the limit as alpha -> 1, Shannon's entropy is returned.

    In the limit as alpha -> inf, min-entropy is returned.
    z+px is not a proper probability distributionr   r#   infN)r   r'   floatr-   r"   stringlowerr   r8   r   r   r   )r*   alphar+   measureZgenentr   r   r   renyientropyi  s    r>   Tc             C   s   dS )a  
    Generalized cross-entropy measures.

    Parameters
    ----------
    px : array-like
        Discrete probability distribution of random variable X
    py : array-like
        Discrete probability distribution of random variable Y
    pxpy : 2d array-like, optional
        Joint probability distribution of X and Y.  If pxpy is None, X and Y
        are assumed to be independent.
    logbase : int or np.e, optional
        Default is 2 (bits)
    measure : str, optional
        The measure is the type of generalized cross-entropy desired. 'T' is
        the cross-entropy version of the Tsallis measure.  'CR' is Cressie-Read
        measure.

    Nr   )r*   r0   r1   r<   r+   r=   r   r   r   gencrossentropy  s    r@   __main__zQFrom Golan (2008) "Information and Entropy Econometrics -- A Review and Synthesisz	Table 3.1g?gS㥛?g;On?g'1Z?gK?gMbp?gh㈵>g-C6?gMbP?g{Gz?g?g333333?g      ?g333333?gffffff?g?g?g      ?o   ZInformationZProbabilityi ZEntropye   gUUUUUU?gqq?gqq?gUUUUUU?z	Table 3.3zdiscretize functionsg3333335@g     @F@g      ?@g     3@gLD@gYC@g333333&@g/@gfffff?@g9@g3333334@gffffff,@g      8@g      5@g&@g      2@gL0@g3333336@g333333@g;@ǧA@g-@g1@g333333<@gffffff0@g     0@g      G@g      #@g2@g@@g:@g0@g333333@gffffff5@g      4@gL=@g @g     6@g)@gfffff:@g     9@gfffff6@gffffff&@g333334@g333333:@g"@g%@g333333/@z0Example in section 3.6 of Golan, using table 3.3z'Bounding errors using Fano's inequalityz"H(P_{e}) + P_{e}log(K-1) >= H(X|Y)zor, a weaker inequalityzP_{e} >= [H(X|Y) - 1]/log(K)z	P(x) = %sz?X = 3 has the highest probability, so this is the estimate Xhatz1The probability of error Pe is 1 - p(X=3) = %0.4gzH(Pe) = %0.4g and K=3z-H(Pe) + Pe*log(K-1) = %0.4g >= H(X|Y) = %0.4gzor using the weaker inequalityz'Pe = %0.4g >= [H(X) - 1]/log(K) = %0.4g   z>Consider now, table 3.5, where there is additional informationz.The conditional probabilities of P(X|Y=y) are g        g      ?z2The probability of error given this information iszPe = [H(X|Y) -1]/log(K) = %0.4gz+such that more information lowers the errorgV-?gV-?gw/?g(\?g+?g%C?gzG?gPn?)N)r   N)r#   )r#   )Nr#   )r#   )r#   )r#   )r   r#   r7   )r   r#   r?   )>__doc__Zstatsmodels.compat.pythonr   r   r   Zscipyr   Znumpyr   Z
matplotlibr   ZpltZ
scipy.miscr   r
   r   r    r"   r%   r&   r-   r.   r3   r4   r5   r6   r>   r@   __name__printr   Yr   pZsubplotZylabelZxlabelZlinspacexZplotZarraywr   r*   r0   ZH_XZH_YZH_XYZ	H_XgivenYZ	H_YgivenXr$   r,   ZD_YXZD_XYZI_XYZdiscXZpeZH_per)   Zw2ZmeanZmarkovchainr   r   r   r   <module>   s   
$

(
#

 

(
,
3









"

"6