
p7]c           @   s   d  Z  d d l m Z d d l m Z d d l Z d d l m Z d d l	 m
 Z
 d d d	  Z d
 d  Z e d d
  Z e d d  Z d   Z d
 d d  Z e Z e Z e e d d Z d S(   s  
Implements Lilliefors corrected Kolmogorov-Smirnov tests for normal and
exponential distributions.

`kstest_fit` is provided as a top-level function to access both tests.
`kstest_normal` and `kstest_exponential` are provided as convenience functions
with the appropriate test as the default.
`lilliefors` is provided as an alias for `kstest_fit`.

Created on Sat Oct 01 13:16:49 2011

Author: Josef Perktold
License: BSD-3

pvalues for Lilliefors test are based on formula and table in

An Analytic Approximation to the Distribution of Lilliefors's Test Statistic for Normality
Author(s): Gerard E. Dallal and Leland WilkinsonSource: The American Statistician, Vol. 40, No. 4 (Nov., 1986), pp. 294-296Published by: American Statistical AssociationStable URL: http://www.jstor.org/stable/2684607 .

On the Kolmogorov-Smirnov Test for Normality with Mean and Variance
Unknown
Hubert W. Lilliefors
Journal of the American Statistical Association, Vol. 62, No. 318. (Jun., 1967), pp. 399-402.


---

Updated 2017-07-23
Jacob C. Kimmel

Ref:
Lilliefors, H.W.
On the Kolmogorov-Smirnov test for the exponential distribution with mean unknown.
Journal of the American Statistical Association, Vol 64, No. 325. (1969), pp. 387–389.
i(   t   partial(   t   string_typesN(   t   statsi   (   t	   TableDistt	   two_sidedc   	      C   s  t  t |    } t | t  r9 t t j |  j } n! t | d  rZ t | d  } n  t	 j
 |   }  | |  |  } | d k r t	 j d | d  | | j   } | d k r | Sn  | d	 k r | t	 j d |  | j   } | d k r | Sn  t	 j | | g  } | S(
   s  
    Calculate statistic for the Kolmogorov-Smirnov test for goodness of fit

    This calculates the test statistic for a test of the distribution G(x) of an observed
    variable against a given distribution F(x). Under the null
    hypothesis the two distributions are identical, G(x)=F(x). The
    alternative hypothesis can be either 'two_sided' (default), 'less'
    or 'greater'. The KS test is only valid for continuous distributions.

    Parameters
    ----------
    x : array_like, 1d
        array of observations
    cdf : string or callable
        string: name of a distribution in scipy.stats
        callable: function to evaluate cdf
    alternative : 'two_sided' (default), 'less' or 'greater'
        defines the alternative hypothesis (see explanation)
    args : tuple, sequence
        distribution parameters for call to cdf


    Returns
    -------
    D : float
        KS test statistic, either D, D+ or D-

    See Also
    --------
    scipy.stats.kstest

    Notes
    -----

    In the one-sided test, the alternative is that the empirical
    cumulative distribution function of the random variable is "less"
    or "greater" than the cumulative distribution function F(x) of the
    hypothesis, G(x)<=F(x), resp. G(x)>=F(x).

    In contrast to scipy.stats.kstest, this function only calculates the
    statistic which can be used either as distance measure or to implement
    case specific p-values.

    t   cdfR   t   greaterg      ?i   t   lessg        (   R   R   (   R   R   (   t   floatt   lent
   isinstanceR   t   getattrR   t   distributionsR   t   hasattrt   npt   sortt   aranget   max(	   t   xR   t   alternativet   argst   nobst   cdfvalst   Dplust   Dmint   D(    (    s<   lib/python2.7/site-packages/statsmodels/stats/_lilliefors.pyt   ksstat-   s"    -$ t   normc         C   s  |  d k rt  j d d d d d d g  d d d	  } t  j d
 d d d d d d d d d d d d d d d d d d d d d d  g t  } t  j d! d" d# d$ d% d& g d' d! d( d) d* d+ g d, d- d. d/ d0 d1 g d2 d3 d4 d5 d6 d7 g d8 d9 d: d; d< d= g d> d? d2 d@ dA dB g dC dD dE dF d5 dG g dH dI dJ dK dL dM g dN dO dP dQ d- dR g dS dT dU dV dW dX g dY dZ dH d[ dF d\ g d] d^ d_ d` da db g dc dd de df dg dh g di dj d^ dk dl dm g dn do dp dT dV dq g dr ds d] dt dD du g dv dw dc dx dy dz g d{ d| dv d} d_ d~ g d d d dv dp dC g d d d d d d g d d d d d d g d d d d d d g d d d d d d g g  d d  d d d	  f d } d   } t  j d d d d d d d d dN d d d d d d d g t  } t  j | j d | j d d g  } x: t t |   D]& } | | |  | | d d  f <q%W| d	  } t  j | | g  }	 t  j | d d  d d	  f | g  }
 t	 | | |  } n|  d k rt  j d d d d d g  d d d	  } t  j d d
 d d d d d d d d d d d d d d d d d d g t  } t  j d d d d d g d d d d d g d d d d d g d d6 d d d g d d d d d g dL d d d d g d dL d d d g d d d dR d g dK d3 d d dB g dE da dW d d6 g d d d dh dM g d d d2 d d g dC d d d, d g d dP d~ d d g d dU d d d. g d dO dy d d' g dS d dI d8 d g d d d dV d g ds d d dO dg g d d d dx d[ g g  d d  d d d	  f d } d   } t  j d d d d d d d d dN d d d d d d d g t  } t  j | j d | j d g  } x: t t |   D]& } | | |  | | d d  f <qHWt  j | | g  } t  j | | g  } t	 | | |  } n t
 d   | S(   s  
    Generates tables for significance levels of Lilliefors test statistics

    Tables for available normal and exponential distribution testing,
    as specified in Lilliefors references above

    Parameters
    ----------
    dist : string.
        distribution being tested in set {'norm', 'exp'}.

    Returns
    -------
    lf : TableDist object.
        table of critical values
    R   g?g333333?g?g?g{Gz?gMbP?Nii   i   i   i   i   i	   i
   i   i   i   i   i   i   i   i   i   i   i   i   i(   id   i  i  i/  iA  iZ  ix  i  i  i!  i?  iW  i  i  i  i  i)  iC  is  i  i   i  i  i0  i_  i  i   i   i	  i   iM  i  i   i   i  i=  im  i   i   i   i  i`  i   i   i   i   i#  iR  i   i   i   i   iE  i   i   i   i   i  i:  i   i   i   i1  i   i   i   i   i   i(  i   i   i   i   i   i  i   i   i   i   i  i   i   i   i  i   i   i   i
  i   i   i   i   i  i   i   i   i   i   i   i   is   ix   i   i   i   i   iJ   iM   iR   iY   ih   iz   i%   i'   i)   i-   i4   i=   i   i   i#   i*   g     @@c         S   s)   t  j d d d d d g  t  j |   S(   NgZd;O?g~jt?g(\?g'1Z?g"~?(   R   t   arrayt   sqrt(   t   n(    (    s<   lib/python2.7/site-packages/statsmodels/stats/_lilliefors.pyt   f   s    i2   i<   iF   iP   i  i  i  i  i  i'  i i    i   it   expi   i  i  i  i'  iX  i  i  i  i  i$  ig  i~  i  i  i  iK  iw  i  i  i5  iG  i^  i4  iI  ih  i  i  i7  iU  i  i  i'  i|  i  i*  i   i   i  i   i   iF  i   i   i;  i   i2  i   i   i   i   i   i   i  i   i   i   i   i   c         S   s)   t  j d d d d d g  t  j |   S(   NgQ?gQ?gQ?g(\?g      ?(   R   R   R   (   R   (    (    s<   lib/python2.7/site-packages/statsmodels/stats/_lilliefors.pyR      s    s4   Invalid dist parameter. dist must be 'norm' or 'exp'(   R   R   R   t   zerost   shapet   rangeR	   t   concatenatet   vstackR   t
   ValueError(   t   distt   alphat   sizet   crit_lfR   t   higher_sizest   higher_crit_lft   it   alpha_larget
   size_larget   crit_lf_larget   lf(    (    s<   lib/python2.7/site-packages/statsmodels/stats/_lilliefors.pyt   get_lilliefors_tablev   s    .*-;	'$
++E	8	#$R'   R    c         C   s   | d k r' |  | d d 9}  d } n  t  j d |  d | d d |  t  j | d  d d	 t  j |  d
 |  } | S(   sZ  approximate pvalues for Lilliefors test

    This is only valid for pvalues smaller than 0.1 which is not checked in
    this function.

    Parameters
    ----------
    Dmax : array_like
        two-sided Kolmogorov-Smirnov test statistic
    n : int or float
        sample size

    Returns
    -------
    p-value : float or ndarray
        pvalue according to approximation formula of Dallal and Wilkinson.

    Notes
    -----
    This is mainly a helper function where the calling code should dispatch
    on bound violations. Therefore it doesn't check whether the pvalue is in
    the valid range.

    Precision for the pvalues is around 2 to 3 decimals. This approximation is
    also used by other statistical packages (e.g. R:fBasics) but might not be
    the most precise available.

    References
    ----------
    DallalWilkinson1986

    id   g      Y@g\(\?gwTi   gvT5A=@gHȰ@g`80C?g}%/?g-9(?(   R   R    R   (   t   DmaxR   t   pval(    (    s<   lib/python2.7/site-packages/statsmodels/stats/_lilliefors.pyt   pval_lf   s    #	Ot   approxc   	      C   s  t  j |   }  t |   } | d k r\ |  |  j   |  j d d  } t j j } t } nC | d k r |  |  j   } t j	 j } t
 } d } n t d   t | | d d } | d	 k r t | |  } | d
 k r| j | |  } qn! | d k r| j | |  } n  | | f S(   s  
    Lilliefors test for normality or an exponential distribution.

    Kolmogorov Smirnov test with estimated mean and variance

    Parameters
    ----------
    x : array_like, 1d
        data series, sample
    dist : {'norm', 'exp'}, optional
        Distribution to test in set.
    pvalmethod : {'approx', 'table'}, optional
        'approx' is only valid for normality. if `dist = 'exp'`,
        `table` is returned.
        'approx' uses the approximation formula of Dalal and Wilkinson,
        valid for pvalues < 0.1. If the pvalue is larger than 0.1, then the
        result of `table` is returned

        For normality:
        'table' uses the table from Dalal and Wilkinson, which is available
        for pvalues between 0.001 and 0.2, and the formula of Lilliefors for
        large n (n>900). Values in the table are linearly interpolated.
        Values outside the range will be returned as bounds, 0.2 for large and
        0.001 for small pvalues.
        For exponential:
        'table' uses the table from Lilliefors 1967, available for pvalues
        between 0.01 and 0.2.
        Values outside the range will be returned as bounds, 0.2 for large and
        0.01 for small pvalues.

    Returns
    -------
    ksstat : float
        Kolmogorov-Smirnov test statistic with estimated mean and variance.
    pvalue : float
        If the pvalue is lower than some threshold, e.g. 0.05, then we can
        reject the Null hypothesis that the sample comes from a normal
        distribution

    Notes
    -----
    Reported power to distinguish normal from some other distributions is lower
    than with the Anderson-Darling test.

    could be vectorized
    R   t   ddofi   R    t   tables4   Invalid dist parameter. dist must be 'norm' or 'exp'R   R   R6   g?(   R   t   asarrayR	   t   meant   stdR   R   R   t   lilliefors_table_normt   expont   lilliefors_table_exponR&   R   R5   t   prob(	   R   R'   t
   pvalmethodR   t   zt   test_dt   lilliefors_tablet   d_ksR4   (    (    s<   lib/python2.7/site-packages/statsmodels/stats/_lilliefors.pyt
   kstest_fit  s(    / 		(    (   t   __doc__t	   functoolsR    t   statsmodels.compat.pythonR   t   numpyR   t   scipyR   t	   tabledistR   R   R2   R<   R>   R5   RE   t
   lillieforst   kstest_normalt   kstest_exponential(    (    (    s<   lib/python2.7/site-packages/statsmodels/stats/_lilliefors.pyt   <module>$   s   In	,K