ó
áp7]c           @   s®   d  Z  d d l m Z d d l Z d d l m Z d d „ Z d d „ Z d d „ Z	 d d	 „ Z
 d
 d d „ Z d d d „ Z d d d e d „ Z d „  Z d d „ Z d S(   sx   
Statistical tests to be used in conjunction with the models

Notes
-----
These functions haven't been formally tested.
iÿÿÿÿ(   t   statsN(   t   ValueWarningi    c         C   s[   t  j |  ƒ }  t  j |  d d | ƒ} t  j | d d | ƒt  j |  d d | ƒ} | S(   s?  
    Calculates the Durbin-Watson statistic

    Parameters
    ----------
    resids : array-like

    Returns
    -------
    dw : float, array-like
        The Durbin-Watson statistic.

    Notes
    -----
    The null hypothesis of the test is that there is no serial correlation.
    The Durbin-Watson test statistics is defined as:

    .. math::

       \sum_{t=2}^T((e_t - e_{t-1})^2)/\sum_{t=1}^Te_t^2

    The test statistic is approximately equal to 2*(1-r) where ``r`` is the
    sample autocorrelation of the residuals. Thus, for r == 0, indicating no
    serial correlation, the test statistic equals 2. This statistic will
    always be between 0 and 4. The closer to 0 the statistic, the more
    evidence for positive serial correlation. The closer to 4, the more
    evidence for negative serial correlation.
    i   t   axisi   (   t   npt   asarrayt   difft   sum(   t   residsR   t   diff_residst   dw(    (    s:   lib/python2.7/site-packages/statsmodels/stats/stattools.pyt   durbin_watson   s    0c         C   sr   t  j |  ƒ }  |  j | } | d k  r_ d d l m } | d t | ƒ t ƒ t  j t  j f St j	 |  d | ƒS(   sÇ   
    Omnibus test for normality

    Parameters
    ----------
    resid : array-like
    axis : int, optional
        Default is 0

    Returns
    -------
    Chi^2 score, two-tail probability
    i   iÿÿÿÿ(   t   warnsP   omni_normtest is not valid with less than 8 observations; %i samples were given.R   (
   R   R   t   shapet   warningsR   t   intR   t   nanR    t
   normaltest(   R   R   t   nR   (    (    s:   lib/python2.7/site-packages/statsmodels/stats/stattools.pyt   omni_normtest2   s    c         C   s•   t  j |  ƒ }  t j |  d | ƒ} d t j |  d | ƒ} |  j | } | d | d d d | d d } t j j | d ƒ } | | | | f S(   sU  
    Calculates the Jarque-Bera test for normality

    Parameters
    ----------
    data : array-like
        Data to test for normality
    axis : int, optional
        Axis to use if data has more than 1 dimension. Default is 0

    Returns
    -------
    JB : float or array
        The Jarque-Bera test statistic
    JBpv : float or array
        The pvalue of the test statistic
    skew : float or array
        Estimated skewness of the data
    kurtosis : float or array
        Estimated kurtosis of the data

    Notes
    -----
    Each output returned has 1 dimension fewer than data


    The Jarque-Bera test statistic tests the null that the data is normally
    distributed against an alternative that the data follow some other
    distribution. The test statistic is based on two moments of the data,
    the skewness, and the kurtosis, and has an asymptotic :math:`\chi^2_2`
    distribution.

    The test statistic is defined

    .. math:: JB = n(S^2/6+(K-3)^2/24)

    where n is the number of data points, S is the sample skewness, and K is
    the sample kurtosis of the data.
    R   i   g      @i   i   g      @(   R   R   R    t   skewt   kurtosisR   t   chi2t   sf(   R   R   R   R   R   t   jbt   jb_pv(    (    s:   lib/python2.7/site-packages/statsmodels/stats/stattools.pyt   jarque_beraM   s    (&c         C   sd  | d	 k r! |  j ƒ  }  d } n  t j |  | ƒ }  t j |  d d d g d | ƒ\ } } } |  j | ƒ } |  j f } | d	 k	 r² t | j ƒ } | j	 | d ƒ t
 | ƒ } n  t j | | ƒ } t j | | ƒ } t j |  | d | ƒ }	 t j |  d | ƒ}
 | | d | | | } | | t j t |  | ƒ d | ƒ} | | |	 } |
 | | | f S(
   sª  
    Calculates the four skewness measures in Kim & White

    Parameters
    ----------
    y : array-like

    axis : int or None, optional
        Axis along which the skewness measures are computed.  If `None`, the
        entire array is used.

    Returns
    -------
    sk1 : ndarray
          The standard skewness estimator.
    sk2 : ndarray
          Skewness estimator based on quartiles.
    sk3 : ndarray
          Skewness estimator based on mean-median difference, standardized by
          absolute deviation.
    sk4 : ndarray
          Skewness estimator based on mean-median difference, standardized by
          standard deviation.

    Notes
    -----
    The robust skewness measures are defined

    .. math::

        SK_{2}=\frac{\left(q_{.75}-q_{.5}\right)
        -\left(q_{.5}-q_{.25}\right)}{q_{.75}-q_{.25}}

    .. math::

        SK_{3}=\frac{\mu-\hat{q}_{0.5}}
        {\hat{E}\left[\left|y-\hat{\mu}\right|\right]}

    .. math::

        SK_{4}=\frac{\mu-\hat{q}_{0.5}}{\hat{\sigma}}

    .. [*] Tae-Hwan Kim and Halbert White, "On more robust estimation of
       skewness and kurtosis," Finance Research Letters, vol. 1, pp. 56-73,
       March 2004.
    i    g      9@g      I@g     ÀR@R   i   i   g       @N(   t   Nonet   ravelR   t   sortt
   percentilet   meant   sizet   listR   t   insertt   tuplet   reshapeR    R   t   abs(   t   yR   t   q1t   q2t   q3t   muR   t   mu_bt   q2_bt   sigmat   sk1t   sk2t   sk3t   sk4(    (    s:   lib/python2.7/site-packages/statsmodels/stats/stattools.pyt   robust_skewness‚   s&    0	*'g      @g      I@c         C   s¬   | d | | d | f } t  j |  | ƒ \ } } } } t  j |  |  | k  ƒ } t  j |  |  | k ƒ }	 t  j |  |  | k  ƒ }
 t  j |  |  | k ƒ } |	 | | |
 S(   sW  
    KR3 estimator from Kim & White

    Parameters
    ----------
    y : array-like, 1-d
    alpha : float, optional
        Lower cut-off for measuring expectation in tail.
    beta :  float, optional
        Lower cut-off for measuring expectation in center.

    Returns
    -------
    kr3 : float
        Robust kurtosis estimator based on standardized lower- and upper-tail
        expected values

    Notes
    -----
    .. [*] Tae-Hwan Kim and Halbert White, "On more robust estimation of
       skewness and kurtosis," Finance Research Letters, vol. 1, pp. 56-73,
       March 2004.
    g      Y@(   R   R   R   (   R%   t   alphat   betat   perct   lower_alphat   upper_alphat
   lower_betat
   upper_betat   l_alphat   u_alphat   l_betat   u_beta(    (    s:   lib/python2.7/site-packages/statsmodels/stats/stattools.pyt   _kr3Î   s    g      @g      9@c         C   s(  |  \ } } | \ } } t  j d ƒ } t j j } t j j } | t  j d ƒ d ƒ \ }	 }
 } } } } d	 | d
 <| | | |	 | |
 | d <| t  j | d | d f ƒ ƒ \ } } d | | ƒ | d | | ƒ | | d <| t  j | d | d f ƒ ƒ \ } } d | d | | d	 <| S(   si  
    Calculates the expected value of the robust kurtosis measures in Kim and
    White assuming the data are normally distributed.

    Parameters
    ----------
    ab: iterable, optional
        Contains 100*(alpha, beta) in the kr3 measure where alpha is the tail
        quantile cut-off for measuring the extreme tail and beta is the central
        quantile cutoff for the standardization of the measure
    db: iterable, optional
        Contains 100*(delta, gamma) in the kr4 measure where delta is the tail
        quantile for measuring extreme values and gamma is the central quantile
        used in the the standardization of the measure

    Returns
    -------
    ekr: array, 4-element
        Contains the expected values of the 4 robust kurtosis measures

    Notes
    -----
    See `robust_kurtosis` for definitions of the robust kurtosis measures
    i   g      ð?g       @g      @g      @g      @g      @i   i   i    i   g      Y@i   g       À(   g      ð?g       @g      @g      @g      @g      @(   R   t   zerosR    t   normt   ppft   pdft   array(   t   abt   dgR2   R3   t   deltat   gammat   expected_valueR@   RA   R&   R'   R(   t   q5t   q6t   q7t   q_alphat   q_betat   q_deltat   q_gamma(    (    s:   lib/python2.7/site-packages/statsmodels/stats/stattools.pyt   expected_robust_kurtosisñ   s    +
)*)c         C   s™  | d k s0 |  j ƒ  j d k rE |  j d k rE |  j ƒ  }  d } n  | \ } } | \ } } d d d d d d | d	 | | d	 | f
 }	 t j |  |	 d
 | ƒ\
 }
 } } } } } } } } } | rÔ t | | ƒ n t j d ƒ } t j	 |  | t
 ƒ | d } | | | |
 | | | d } |  j d k rFt |  | | ƒ } n t j t | |  | | ƒ } | | d 8} | | | | | d } | | | | f S(   sê  
    Calculates the four kurtosis measures in Kim & White

    Parameters
    ----------
    y : array-like
    axis : int or None, optional
        Axis along which the kurtoses are computed.  If `None`, the
        entire array is used.
    ab: iterable, optional
        Contains 100*(alpha, beta) in the kr3 measure where alpha is the tail
        quantile cut-off for measuring the extreme tail and beta is the central
        quantile cutoff for the standardization of the measure
    db: iterable, optional
        Contains 100*(delta, gamma) in the kr4 measure where delta is the tail
        quantile for measuring extreme values and gamma is the central quantile
        used in the the standardization of the measure
    excess : bool, optional
        If true (default), computed values are excess of those for a standard
        normal distribution.

    Returns
    -------
    kr1 : ndarray
          The standard kurtosis estimator.
    kr2 : ndarray
          Kurtosis estimator based on octiles.
    kr3 : ndarray
          Kurtosis estimators based on exceedence expectations.
    kr4 : ndarray
          Kurtosis measure based on the spread between high and low quantiles.

    Notes
    -----
    The robust kurtosis measures are defined

    .. math::

        KR_{2}=\frac{\left(\hat{q}_{.875}-\hat{q}_{.625}\right)
        +\left(\hat{q}_{.375}-\hat{q}_{.125}\right)}
        {\hat{q}_{.75}-\hat{q}_{.25}}

    .. math::

        KR_{3}=\frac{\hat{E}\left(y|y>\hat{q}_{1-\alpha}\right)
        -\hat{E}\left(y|y<\hat{q}_{\alpha}\right)}
        {\hat{E}\left(y|y>\hat{q}_{1-\beta}\right)
        -\hat{E}\left(y|y<\hat{q}_{\beta}\right)}

    .. math::

        KR_{4}=\frac{\hat{q}_{1-\delta}-\hat{q}_{\delta}}
        {\hat{q}_{1-\gamma}-\hat{q}_{\gamma}}

    where :math:`\hat{q}_{p}` is the estimated quantile at :math:`p`.

    .. [*] Tae-Hwan Kim and Halbert White, "On more robust estimation of
       skewness and kurtosis," Finance Research Letters, vol. 1, pp. 56-73,
       March 2004.
    i   i    g      )@g      9@g     ÀB@g     @O@g     ÀR@g     àU@g      Y@R   i   i   i   N(   R   t   squeezet   ndimR   R   R   RO   R>   R    R   t   FalseR=   t   apply_along_axis(   R%   R   RC   RD   t   excessR2   R3   RE   RF   R4   t   e1t   e2t   e3t   e5t   e6t   e7t   fdt   f1mdt   fgt   f1mgRG   t   kr1t   kr2t   kr3t   kr4(    (    s:   lib/python2.7/site-packages/statsmodels/stats/stattools.pyt   robust_kurtosis  s&    =$	'$"c         C   s²  t  j t  j |  ƒ ƒ }  |  j d k r6 t d ƒ ‚ n  t  j |  ƒ }  |  j d } | d d k r‡ |  | d d |  | d d } n |  | d d } |  | } | | d k } | | d k } | d d … d f } | | } t  j | d k | d k ƒ } t  j	 | | <| | } | | }	 t  j
 | d k ƒ }
 |
 r¥t  j |
 |
 f ƒ t  j |
 ƒ } | d t  j | ƒ 8} t  j | ƒ } | |	 d |
 … |
 d … f <n  t  j |	 ƒ S(   s  
    Calculates the medcouple robust measure of skew.

    Parameters
    ----------
    y : array-like, 1-d

    Returns
    -------
    mc : float
        The medcouple statistic

    Notes
    -----
    The current algorithm requires a O(N**2) memory allocations, and so may
    not work for very large arrays (N>10000).

    .. [*] M. Huberta and E. Vandervierenb, "An adjusted boxplot for skewed
       distributions" Computational Statistics & Data Analysis, vol. 52, pp.
       5186-5201, August 2008.
    i   s#   y must be squeezable to a 1-d arrayi    i   g        N(   R   RP   R   RQ   t
   ValueErrorR   R   R   t   logical_andt   infR   t   onest   eyet   triut   fliplrt   median(   R%   R   t   mft   zt   lowert   uppert   standardizationt   is_zerot   spreadt   ht   num_tiest   replacements(    (    s:   lib/python2.7/site-packages/statsmodels/stats/stattools.pyt   _medcouple_1dv  s0    %



" c         C   sG   t  j |  d t  j ƒ}  | d k r4 t |  j ƒ  ƒ St  j t | |  ƒ S(   sÐ  
    Calculates the medcouple robust measure of skew.

    Parameters
    ----------
    y : array-like
    axis : int or None, optional
        Axis along which the medcouple statistic is computed.  If `None`, the
        entire array is used.

    Returns
    -------
    mc : ndarray
        The medcouple statistic with the same shape as `y`, with the specified
        axis removed.

    Notes
    -----
    The current algorithm requires a O(N**2) memory allocations, and so may
    not work for very large arrays (N>10000).

    .. [*] M. Huberta and E. Vandervierenb, "An adjusted boxplot for skewed
       distributions" Computational Statistics & Data Analysis, vol. 52, pp.
       5186-5201, August 2008.
    t   dtypeN(   R   R   t   doubleR   Rv   R   RS   (   R%   R   (    (    s:   lib/python2.7/site-packages/statsmodels/stats/stattools.pyt	   medcouple³  s    (   g      @g      I@(   g      @g      9@(   g      @g      I@(   g      @g      9@(   t   __doc__t   scipyR    t   numpyR   t   statsmodels.tools.sm_exceptionsR   R
   R   R   R1   R=   RO   t   TrueRc   Rv   Ry   (    (    (    s:   lib/python2.7/site-packages/statsmodels/stats/stattools.pyt   <module>   s   #5L#-X	=