ó
áp7]c           @   s4  d  d l  m Z d  d l Z d  d l m Z d  d l m Z d  d l	 m
 Z
 d  d l m Z m Z d  d l m Z d e f d	 „  ƒ  YZ d
 e f d „  ƒ  YZ d d „ Z d e f d „  ƒ  YZ d „  Z d „  Z d d d d d „ Z d e f d „  ƒ  YZ d d „ Z d d d d „ Z d d d e d „ Z d S(   iÿÿÿÿ(   t   rangeN(   t   f(   t   t(   t   stats(   t   clean0t   fullrank(   t   multipletestst   ContrastResultsc           B   sk   e  Z d  Z d d d d d d d d „ Z d d „ Z d „  Z d „  Z d d d d „ Z d d d „ Z	 RS(	   sY  
    Class for results of tests of linear restrictions on coefficients in a model.

    This class functions mainly as a container for `t_test`, `f_test` and
    `wald_test` for the parameters of a model.

    The attributes depend on the statistical test and are either based on the
    normal, the t, the F or the chisquare distribution.

    gš™™™™™©?c         K   sü  | |  _  | d  k	 rx d |  _ | |  _ |  j |  _ | |  _ | |  _ t |  _ | | f |  _	 t j
 | | | ƒ |  _ nk| d  k	 rî d |  _ | |  _ | |  _ | |  _ | |  _ t |  _ | f |  _	 |  j j
 t j | ƒ | ƒ d |  _ nõ d | k rã| d |  _ | d |  _ | d |  _ }	 | |  _ t t |  j ƒ |  _ | j d d ƒ |  _	 |  j d k rŽ|  j j
 |  j | ƒ |  _ | |  _ qãt j |	 t j ƒ |  _ t j |	 ƒ }
 |  j j
 t j |	 |
 ƒ ƒ d |  j |
 <n  t j |  j ƒ |  _ d  S(	   Nt   FR   i   t	   statistict   distributiont	   dist_argst   chi2(    (   t   effectt   NoneR
   t   fvalueR	   t   df_denomt   df_numt   fdistt   distR   t   sft   pvaluet   tvaluet   sdt	   student_tt   npt   abst   getattrR   t   gett	   full_liket   nant   isnant   squeeze(   t   selfR   R   R   R   R   R   t   alphat   kwdst   valuet   not_nan(    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyt   __init__   sB    												(	-c         C   sz   |  j  d k	 rj |  j j d | d |  j Œ } |  j  | |  j } |  j  | |  j } t j | | f ƒ St d ƒ ‚ d S(   s'  
        Returns the confidence interval of the value, `effect` of the constraint.

        This is currently only available for t and z tests.

        Parameters
        ----------
        alpha : float, optional
            The significance level for the confidence interval.
            ie., The default `alpha` = .05 returns a 95% confidence interval.

        Returns
        -------
        ci : ndarray, (k_constraints, 2)
            The array has the lower and the upper limit of the confidence
            interval in the columns.

        i   g       @s!   Confidence Interval not availableN(	   R   R   R   t   ppfR   R   R   t   column_stackt   NotImplementedError(   R!   R"   t   qt   lowert   upper(    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyt   conf_intD   s     c         C   s   |  j  ƒ  j ƒ  S(   N(   t   summaryt   __str__(   R!   (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyR/   `   s    c         C   s   t  |  j ƒ d |  j ƒ  S(   Ns   
(   t   strt	   __class__R/   (   R!   (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyt   __repr__c   s    c   
      C   sy  |  j  d k	 rý | d k r$ d } n | d k r9 d } n  |  j d k } d } | d k r‰ g  t t |  j  ƒ ƒ D] } d | ^ qp } n  d d l m } t j |  j	 ƒ } | |  |  j  |  j
 |  j | |  j | ƒ f d | d	 | d
 | d | d | ƒ}	 |	 St |  d ƒ r2d t |  j ƒ |  j	 |  j |  j f S|  j d k rad |  j |  j |  j	 |  j f Sd |  j |  j	 f Sd S(   s*  Summarize the Results of the hypothesis test

        Parameters
        ----------

        xname : list of strings, optional
            Default is `c_##` for ## in p the number of regressors
        alpha : float
            significance level for the confidence intervals. Default is
            alpha = 0.05 which implies a confidence level of 95%.
        title : string, optional
            Title for the params table. If not None, then this replaces the
            default title

        Returns
        -------
        smry : string or Summary instance
            This contains a parameter results table in the case of t or z test
            in the same form as the parameter results table in the model
            results summary.
            For F or Wald test, the return is a string.

        s   Test for Constraintst    R   t   constraintss   c%diÿÿÿÿ(   t   summary_paramst   ynamet   xnamet   use_tt   titleR"   R   s0   <F test: F=%s, p=%s, df_denom=%.3g, df_num=%.3g>R   s9   <Wald test (%s): statistic=%s, p-value=%s, df_denom=%.3g>s%   <Wald test: statistic=%s, p-value=%s>N(   R   R   R
   R    t   lent   statsmodels.iolib.summaryR5   R   t
   atleast_1dR   R   R	   R-   t   hasattrt   reprR   R   R   (
   R!   R7   R"   R9   R8   R6   t   iiR5   t   pvaluest   summ(    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyR.   f   s4    		/c      
   C   sÎ   |  j  d k	 r¾ |  j d k } d } | d k r_ g  t t |  j  ƒ ƒ D] } d | ^ qF } n  d d l m } | |  |  j  |  j |  j |  j	 |  j
 | ƒ f d | d | d | d	 | ƒ} | St d
 ƒ ‚ d S(   sp   Return the parameter table as a pandas DataFrame

        This is only available for t and normal tests
        R   R4   s   c%diÿÿÿÿ(   t   summary_params_frameR6   R7   R8   R"   s   only available for t and zN(   R   R   R
   R    R:   R;   RB   R   R	   R   R-   R)   (   R!   R7   R"   R8   R6   R?   RB   RA   (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyt   summary_frame¢   s    /	N(
   t   __name__t
   __module__t   __doc__R   R&   R-   R/   R2   R.   RC   (    (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyR      s   
,		<t   Contrastc           B   s5   e  Z d  Z d „  Z e e ƒ Z d „  Z d „  Z RS(   s[  
    This class is used to construct contrast matrices in regression models.

    They are specified by a (term, design) pair.  The term, T, is a linear
    combination of columns of the design matrix. The matrix attribute of
    Contrast is a contrast matrix C so that

    colspan(dot(D, C)) = colspan(dot(D, dot(pinv(D), T)))

    where pinv(D) is the generalized inverse of D. Further, the matrix

    Tnew = dot(C, D)

    is full rank. The rank attribute is the rank of

    dot(D, dot(pinv(D), T))

    In a regression model, the contrast tests that E(dot(Tnew, Y)) = 0
    for each column of Tnew.

    Parameters
    ----------
    term : array-like
    design : array-like

    Attributes
    ----------
    contrast_matrix

    Examples
    --------
    >>> import statsmodels.api as sm
    >>> from statsmodels.stats.contrast import Contrast
    >>> import numpy as np
    >>> np.random.seed(54321)
    >>> X = np.random.standard_normal((40,10))
    # Get a contrast
    >>> new_term = np.column_stack((X[:,0], X[:,2]))
    >>> c = Contrast(new_term, X)
    >>> test = [[1] + [0]*9, [0]*2 + [1] + [0]*7]
    >>> np.allclose(c.contrast_matrix, test)
    True

    Get another contrast

    >>> P = np.dot(X, np.linalg.pinv(X))
    >>> resid = np.identity(40) - P
    >>> noise = np.dot(resid,np.random.standard_normal((40,5)))
    >>> new_term2 = np.column_stack((noise,X[:,2]))
    >>> c2 = Contrast(new_term2, X)
    >>> print(c2.contrast_matrix)
    [ -1.26424750e-16   8.59467391e-17   1.56384718e-01  -2.60875560e-17
    -7.77260726e-17  -8.41929574e-18  -7.36359622e-17  -1.39760860e-16
    1.82976904e-16  -3.75277947e-18]

    Get another contrast

    >>> zero = np.zeros((40,))
    >>> new_term3 = np.column_stack((zero,X[:,2]))
    >>> c3 = Contrast(new_term3, X)
    >>> test2 = [0]*2 + [1] + [0]*7
    >>> np.allclose(c3.contrast_matrix, test2)
    True

    c         C   s#   t  |  d ƒ s |  j ƒ  n  |  j S(   s3   
        Gets the contrast_matrix property
        t   _contrast_matrix(   R=   t   compute_matrixRH   (   R!   (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyt   _get_matrixü   s    c         C   s(   t  j | ƒ |  _ t  j | ƒ |  _ d  S(   N(   R   t   asarrayt   termt   design(   R!   RL   RM   (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyR&     s    c         C   s’   |  j  } | j d k r1 | d d … d f } n  t | ƒ |  _ |  j |  _ t |  j |  j ƒ |  _ y |  j	 j
 d |  _ Wn d |  _ n Xd S(   sµ   
        Construct a contrast matrix C so that

        colspan(dot(D, C)) = colspan(dot(D, dot(pinv(D), T)))

        where pinv(D) is the generalized inverse of D=design.
        i   N(   RL   t   ndimR   R   t   TRM   t   Dt   contrastfromcolsRH   t   matrixt   shapet   rank(   R!   RO   (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyRI   
  s    		(   RD   RE   RF   RJ   t   propertyt   contrast_matrixR&   RI   (    (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyRG   º   s
   A		c         C   sd  t  j |  ƒ }  t  j | ƒ } | j \ } } |  j d | k rb |  j d | k rb t d ƒ ‚ n  | d k rƒ t  j j | ƒ } n  |  j d | k r® t  j | |  ƒ j } n* |  } t  j | t  j | | j ƒ ƒ j } t  j | | j ƒ } t	 | j ƒ d k r| d f | _ n  t  j j
 | ƒ | j d k rWt | ƒ } t  j | | ƒ j } n  t  j | ƒ S(   sš  
    From an n x p design matrix D and a matrix L, tries
    to determine a p x q contrast matrix C which
    determines a contrast of full rank, i.e. the
    n x q matrix

    dot(transpose(C), pinv(D))

    is full rank.

    L must satisfy either L.shape[0] == n or L.shape[1] == p.

    If L.shape[0] == n, then L is thought of as representing
    columns in the column space of D.

    If L.shape[1] == p, then L is thought of as what is known
    as a contrast matrix. In this case, this function returns an estimable
    contrast corresponding to the dot(D, L.T)

    Note that this always produces a meaningful contrast, not always
    with the intended properties because q is always non-zero unless
    L is identically 0. That is, it produces a contrast that spans
    the column space of L (after projection onto the column space of D).

    Parameters
    ----------
    L : array-like
    D : array-like
    i    i   s   shape of L and D mismatchedN(   R   RK   RS   t
   ValueErrorR   t   linalgt   pinvt   dotRO   R:   t   matrix_rankR   R    (   t   LRP   t   pseudot   nt   pt   Ct   Lp(    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyRQ      s$    &$t   WaldTestResultsc           B   sA   e  Z d d d  „ Z e d „  ƒ Z d „  Z d „  Z d „  Z RS(   c         C   s/  | |  _  | |  _ | |  _ | |  _ | d  k	 r… | d j |  _ | d j |  _ | d j |  _ |  j d k r+| d j |  _ q+n¦ |  j d k r³ t	 j
 |  _ |  j d |  _ n? |  j d k ræ t	 j |  _ |  j \ |  _ |  _ n t d ƒ ‚ | d  k r"|  j j t j | ƒ | Œ |  _ n	 | |  _ d  S(	   NR	   R   t   df_constraintR   R   R   i    s)   only F and chi2 are possible distribution(   t   tableR
   R	   R   R   t   valuesR@   t   df_constraintsR   R   R   R   R   RW   R   R   R   (   R!   R	   R
   R   Rd   R@   (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyR&   _  s(    				$c         C   sB   d |  j  } |  j  | d g } |  j  d k r> | j d ƒ n  | S(   s'   column names for summary table
        s   P>%ss   df constraintR   s   df denom(   R
   t   append(   R!   t   pr_testt	   col_names(    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyRi     s
    c         C   sS   t  |  d ƒ r |  j St t |  j j |  j ƒ ƒ } |  j j d | ƒ |  _ |  j S(   Nt   _dframet   columns(	   R=   Rj   t   dictt   zipRd   Rk   Ri   t   renamet   dframe(   R!   t   renaming(    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyRC   Œ  s
    c         C   s   |  j  ƒ  j ƒ  S(   N(   RC   t	   to_string(   R!   (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyR/   –  s    c         C   s   t  |  j ƒ d |  j ƒ  S(   Ns   
(   R0   R1   R/   (   R!   (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyR2   š  s    N(	   RD   RE   R   R&   RU   Ri   RC   R/   R2   (    (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyRb   \  s   !	
	c         C   sO   t  j |  d ƒ } g  t | Œ  D]& } d | | d | | d f ^ q } | S(   s8   helper function for labels for pairwise comparisons
    i   s   %s-%si    (   R   t   triu_indicesRm   (   t   k_levelt   level_namest   idx_pairs_allt   namet   labels(    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyt   _get_pairs_labels¡  s    6c         C   së   | d } t  j | d ƒ } t | d ƒ } t  j | | f ƒ } d | t  j | ƒ | d f <d | t  j | ƒ | d f <t  j | ƒ } t  j | | f d d ƒ} | j d }	 t  j |	 |  f ƒ }
 | |
 d d … | | | … f <|
 S(   så  create pairwise contrast for reference coding

    currently not used,
    using encoding contrast matrix is more general, but requires requires
    factor information from patsy design_info.


    Parameters
    ----------
    k_params : int
        number of parameters
    k_level : int
        number of levels or categories (including reference case)
    idx_start : int
        Index of the first parameter of this factor. The restrictions on the
        factor are inserted as a block in the full restriction matrix starting
        at column with index `idx_start`.

    Returns
    -------
    contrasts : ndarray
        restriction matrix with k_params columns and number of rows equal to
        the number of restrictions.

    i   i    iÿÿÿÿt   axisN(   R   Rr   R:   t   zerost   aranget   eyet   concatenateRS   (   t   k_paramsRs   t	   idx_startt
   k_level_m1t	   idx_pairst   kt   c_pairst   c_referencet   ct   k_allt	   contrasts(    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyt   _contrast_pairs©  s    
 t   hsgš™™™™™©?c   
      C   s“   |  j  | ƒ } | j d | ƒ } t | ƒ t k	 r? | g } n  xM | D]E } t | j d | d | ƒ}	 |	 d | d | <|	 d | d | <qF W| S(   sÔ  perform t_test and add multiplicity correction to results dataframe

    Parameters
    ----------
    result results instance
        results of an estimated model
    contrasts : ndarray
        restriction matrix for t_test
    method : string or list of strings
        method for multiple testing p-value correction, default is'hs'.
    alpha : float
        significance level for multiple testing reject decision.
    ci_method : None
        not used yet, will be for multiplicity corrected confidence intervals
    contrast_names : list of strings or None
        If contrast_names are provided, then they are used in the index of the
        returned dataframe, otherwise some generic default names are created.

    Returns
    -------
    res_df : pandas DataFrame
        The dataframe contains the results of the t_test and additional columns
        for multiplicity corrected p-values and boolean indicator for whether
        the Null hypothesis is rejected.
    R7   t   methodR"   i   s	   pvalue-%si    s	   reject-%s(   t   t_testRC   t   typet   listR   R   (
   t   resultR‡   RŠ   R"   t	   ci_methodt   contrast_namest   ttt   res_dft   metht   mt(    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyt   t_test_multiÔ  s    t   MultiCompResultc           B   s   e  Z d  Z d „  Z RS(   sd   class to hold return of t_test_pairwise

    currently just a minimal class to hold attributes.
    c         K   s   |  j  j | ƒ d  S(   N(   t   __dict__t   update(   R!   t   kwargs(    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyR&      s    (   RD   RE   RF   R&   (    (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyR–   û  s   c         C   sm   |  j  \ } } t j | | f ƒ } | d k rS |  | d d … | | | … f <n |  | d d … | f <| S(   sq  helper function to expand constraints to a full restriction matrix

    Parameters
    ----------
    contrasts : ndarray
        restriction matrix for t_test
    k_params : int
        number of parameters
    idx_start : int
        Index of the first parameter of this factor. The restrictions on the
        factor are inserted as a block in the full restriction matrix starting
        at column with index `idx_start`.
    index : slice or ndarray
        Column index if constraints do not form a block in the full restriction
        matrix, i.e. if parameters that are subject to restrictions are not
        consecutive in the list of parameters.
        If index is not None, then idx_start is ignored.

    Returns
    -------
    contrasts : ndarray
        restriction matrix with k_params columns and number of rows equal to
        the number of restrictions.
    N(   RS   R   Rz   R   (   R‡   R~   R   t   indext   k_ct   k_pR…   (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyt   _embed_constraints  s    #t   pairwisec   
      C   s¤   |  } | j  \ } } d d l j j j } | d k rI | j | ƒ } n t d ƒ ‚ | j | ƒ }	 | d k	 r  | d k r‹ t	 d ƒ ‚ n  t
 |	 | | ƒ }	 n  |	 S(	   s  helper function to create constraints based on encoding matrix

    Parameters
    ----------
    encoding_matrix : ndarray
        contrast matrix for the encoding of a factor as defined by patsy.
        The number of rows should be equal to the number of levels or categories
        of the factor, the number of columns should be equal to the number
        of parameters for this factor.
    comparison : str
        Currently only 'pairwise' is implemented. The restriction matrix
        can be used for testing the hypothesis that all pairwise differences
        are zero.
    k_params : int
        number of parameters
    idx_start : int
        Index of the first parameter of this factor. The restrictions on the
        factor are inserted as a block in the full restriction matrix starting
        at column with index `idx_start`.

    Returns
    -------
    contrast : ndarray
        Contrast or restriction matrix that can be used in hypothesis test
        of model results. The number of columns is k_params.
    iÿÿÿÿNRž   t   pwt   pairss!   currentlyonly pairwise comparisons3   if k_params is not None, then idx_start is required(   Rž   RŸ   R    (   RS   t#   statsmodels.sandbox.stats.multicompt   sandboxR   t	   multicompt   contrast_allpairsR)   RZ   R   RW   R   (
   t   encoding_matrixt
   comparisonR~   R   t   cmRs   Rœ   t   mct   c_allR‡   (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyt   _constraints_factor'  s    c         C   s©  |  j  j j } | j j | ƒ } | j | } | j | j }	 | ri t | j	 ƒ d k ri t
 d ƒ ‚ n  | j	 d }
 | j |
 j } | d k	 rÌ t | ƒ t | ƒ k r³ | } qÌ t
 d t | ƒ ƒ ‚ n  t | ƒ } | j | d j |
 j } t |  j ƒ } t | | ƒ } d d l j j j } | j | ƒ } | j | ƒ } t | | |	 ƒ } t |  | d | d d d	 | d
 | ƒ} t d | d | d | d | d | ƒ } | S(   s²  perform pairwise t_test with multiple testing corrected p-values

    This uses the formula design_info encoding contrast matrix and should
    work for all encodings of a main effect.

    Parameters
    ----------
    result : result instance
        The results of an estimated model with a categorical main effect.
    term_name : str
        name of the term for which pairwise comparisons are computed.
        Term names for categorical effects are created by patsy and
        correspond to the main part of the exog names.
    method : str or list of strings
        multiple testing p-value correction, default is 'hs',
        see stats.multipletesting
    alpha : float
        significance level for multiple testing reject decision.
    factor_labels : None, list of str
        Labels for the factor levels used for pairwise labels. If not
        provided, then the labels from the formula design_info are used.
    ignore : boolean
        Turn off some of the exceptions raised by input checks.

    Returns
    -------
    results : instance of a simple Results class
        The results are stored as attributes, the main attributes are the
        following two. Other attributes are added for debugging purposes
        or as background information.

        - result_frame : pandas DataFrame with t_test results and multiple
          testing corrected p-values.
        - contrasts : matrix of constraints of the null hypothesis in the
          t_test.

    Notes
    -----

    Status: experimental. Currently only checked for treatment coding with
    and without specified reference level.

    Currently there are no multiple testing corrected confidence intervals
    available.

    i   s%   interaction effects not yet supportedi    s0   factor_labels has the wrong length, should be %diÿÿÿÿNRŠ   R   R"   R   t   result_frameR‡   RL   t   contrast_labelst   term_encoding_matrix(   t   modelt   datat   design_infot
   term_namesRš   t   termst   term_slicest   startR:   t   factorsRW   t   factor_infost
   categoriesR   t   term_codingst   contrast_matricesRR   t   paramsRx   R¡   R¢   R   R£   R¤   RZ   R   R•   R–   (   RŽ   t	   term_nameRŠ   R"   t   factor_labelst   ignoret   desinfot   term_idxRL   R   t   factort   catRs   R§   R~   Rw   R¨   t   c_all_pairst   contrasts_subR‡   R’   t   res(    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyt   t_test_pairwiseV  s8    1		(   t   statsmodels.compat.pythonR    t   numpyR   t   scipy.statsR   R   R   R   t   scipyR   t   statsmodels.tools.toolsR   R   t   statsmodels.stats.multitestR   t   objectR   RG   R   RQ   Rb   Rx   Rˆ   R•   R–   R   Rª   t   FalseRÅ   (    (    (    s9   lib/python2.7/site-packages/statsmodels/stats/contrast.pyt   <module>   s(   ¯f<E		+	&	#.