ó
áp7]c           @   sK  d  Z  d d l Z d d l j j Z d d l j Z	 d d l
 Z d d l m Z m Z m Z m Z m Z m Z m Z d d l m Z m Z d d „ Z d d d „ Z d e e d	 „ Z e d
 d „ Z d „  Z d d „ Z d „  Z  d „  Z! d „  Z" d d „ Z# d „  Z$ d „  Z% d „  Z& d „  Z' d e( f d „  ƒ  YZ) e d „ Z* d S(   s   
Utility functions models code
iÿÿÿÿN(   t   reducet   lzipt   lmapt   asstr2t   ranget   longt   string_types(   t   _is_using_pandast   _is_recarrayi    c         C   s5   i  } x( t  |  ƒ D] \ } } | | | | <q W| S(   sd   
    Helper function to create a dictionary mapping a column number
    to the name in tmp_arr.
    (   t	   enumerate(   t   tmp_arrt   offsett   col_mapt   it   col_name(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   _make_dictnames   s    i   c         C   sé   t  j |  ƒ }  |  j d k r7 |  d d … d f }  n  | d k	 rÄ t  j | ƒ } | j d k rz | d d … d f } n  t  j t  j |  ƒ j | ƒ t  j | ƒ j | ƒ ƒ } |  | | | f St  j |  ƒ j | ƒ } |  | Sd S(   s™  
    Returns views on the arrays Y and X where missing observations are dropped.

    Y : array-like
    X : array-like, optional
    axis : int
        Axis along which to look for missing observations.  Default is 1, ie.,
        observations in rows.

    Returns
    -------
    Y : array
        All Y where the
    X : array

    Notes
    -----
    If either Y or X is 1d, it is reshaped to be 2d.
    i   N(   t   npt   asarrayt   ndimt   Nonet   arrayt   logical_andt   isnant   any(   t   Yt   Xt   axist   keepidx(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   drop_missing   s    c      
      sï  t  | t t f ƒ rC t | ƒ d k r4 | d } qC t d ƒ ‚ n  t  |  t j t j f ƒ r t  | t t	 f ƒ r | d
 k	 r t d ƒ ‚ n  t  |  t j ƒ rô | d
 k r½ t d ƒ ‚ n$ | |  k rá t d j | ƒ ƒ ‚ n  |  | }  d
 } n  t  |  t j ƒ r½| d
 k	 r9|  j | k r9t d j | ƒ ƒ ‚ n  t j |  ƒ } t j | ƒ ‰  ‡  f d †  t | j ƒ Dƒ } | s©t ˆ  j ƒ ˆ  _ t j ˆ  |  g d ƒ ‰  n  | r¹ˆ  | f Sˆ  S|  j j sÛ|  j t j k r×| r	t j |  ƒ j d k r	t d	 ƒ ‚ n  t  | t	 t f ƒ r1|  j j | } n  | d
 k rt|  j j rtt |  j j ƒ d k rt|  j j d } n  t j |  | ƒ } t } |  | j d k r¿| d
 d
 … d
 f } t } n  | |  | k j  t! ƒ } | rüt j | ƒ j" d d ƒ } n  | j j s3g  t j | ƒ D] }	 t# |	 ƒ ^ q} n= | j j rpg  t j | j$ ƒ  ƒ D] }	 t# |	 ƒ ^ qU} n  | d
 k r£y |  j j d } Wq£d } q£Xn  g  | D] }	 | d |	 ^ qª} | t k r t |  j ƒ d k rp| j% d | j% d k  rt j | ƒ j" d d ƒ } n  t& | | j j' g t | ƒ ƒ }
 t j( t) t | j$ ƒ  ƒ d |
 ƒj* t+ |  ƒ ƒ St, j- |  | d t d t+ |  ƒ t j k ƒ}  n  t, j. |  | d | d t d t+ |  ƒ t j k ƒ}  |  St  |  t j/ ƒ søt0 d ƒ ‚ nót  | t	 t f ƒ rû|  j% d } t j |  d
 d
 … | f ƒ } | d
 d
 … t j1 f |  d
 d
 … | f k j  t! ƒ } | j" d d ƒ } | t k r½| d 8} t j2 |  | d d ƒj  t! ƒ }  n  t j3 |  | f ƒ }  | t k r÷t4 | | ƒ } |  | f S|  S| d
 k rÛt j |  ƒ j d k rÛt j |  ƒ } | d
 d
 … d
 f |  k j  t! ƒ } | j" d d ƒ } | t k r—| t k r“t4 | ƒ } | | f S| St j3 |  | f ƒ }  | t k rÔt4 | d d ƒ} |  | f S|  Sn t d | ƒ ‚ d
 S(   sÃ  
    Returns a dummy matrix given an array of categorical variables.

    Parameters
    ----------
    data : array
        A structured array, recarray, array, Series or DataFrame.  This can be
        either a 1d vector of the categorical variable or a 2d array with
        the column specifying the categorical variable specified by the col
        argument.
    col : {str, int, None}
        If data is a DataFrame col must in a column of data. If data is a
        Series, col must be either the name of the Series or None. If data is a
        structured array or a recarray, `col` can be a string that is the name
        of the column that contains the variable.  For all other
        arrays `col` can be an int that is the (zero-based) column index
        number.  `col` can only be None for a 1d array.  The default is None.
    dictnames : bool, optional
        If True, a dictionary mapping the column number to the categorical
        name is returned.  Used to have information about plain arrays.
    drop : bool
        Whether or not keep the categorical variable in the returned matrix.

    Returns
    -------
    dummy_matrix, [dictnames, optional]
        A matrix of dummy (indicator/binary) float variables for the
        categorical data.  If dictnames is True, then the dictionary
        is returned as well.

    Notes
    -----
    This returns a dummy variable for EVERY distinct variable.  If a
    a structured or recarray is provided, the names for the new variable is the
    old variable name - underscore - category name.  So if the a variable
    'vote' had answers as 'yes' or 'no' then the returned array would have to
    new variables-- 'vote_yes' and 'vote_no'.  There is currently
    no name checking.

    Examples
    --------
    >>> import numpy as np
    >>> import statsmodels.api as sm

    Univariate examples

    >>> import string
    >>> string_var = [string.ascii_lowercase[0:5],                       string.ascii_lowercase[5:10],                       string.ascii_lowercase[10:15],                       string.ascii_lowercase[15:20],                         string.ascii_lowercase[20:25]]
    >>> string_var *= 5
    >>> string_var = np.asarray(sorted(string_var))
    >>> design = sm.tools.categorical(string_var, drop=True)

    Or for a numerical categorical variable

    >>> instr = np.floor(np.arange(10,60, step=2)/10)
    >>> design = sm.tools.categorical(instr, drop=True)

    With a structured array

    >>> num = np.random.randn(25,2)
    >>> struct_ar = np.zeros((25,1), dtype=[('var1', 'f4'),('var2', 'f4'),                      ('instrument','f4'),('str_instr','a5')])
    >>> struct_ar['var1'] = num[:,0][:,None]
    >>> struct_ar['var2'] = num[:,1][:,None]
    >>> struct_ar['instrument'] = instr[:,None]
    >>> struct_ar['str_instr'] = string_var[:,None]
    >>> design = sm.tools.categorical(struct_ar, col='instrument', drop=True)

    Or

    >>> design2 = sm.tools.categorical(struct_ar, col='str_instr', drop=True)
    i   i    s%   Can only convert one column at a times   col must be a str, int or Nones/   col must be a str or int when using a DataFrames   Column '{0}' not found in datas"   data.name does not match col '{0}'c            s+   i  |  ]! \ } } | ˆ  k r | | “ q S(    (    (   t   .0R   t   cat(   t   dummies(    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pys
   <dictcomp>¨   s   	 	s)   col is None and the input array is not 1dNt   vart   _t   dtypet   usemaskt
   asrecarrayt   datas$   Array-like objects are not supportedR   R   s   The index %s is not understood(5   t
   isinstancet   listt   tuplet   lent
   ValueErrort   pdt	   DataFramet   SeriesR   t   intR   t	   TypeErrort   formatt   namet   Categoricalt   get_dummiesR	   t
   categoriest   columnst   concatR"   t   namest	   __class__R   t   recarrayt   squeezeR   t
   IndexErrorR   t   uniquet   Truet   Falset   astypet   floatt   swapaxesR   t   tolistt   shapeR   t   strR   R   t   viewt   typet   nprft   drop_fieldst   append_fieldst   ndarrayt   NotImplementedErrort   newaxist   deletet   column_stackR   (   R%   t   colt	   dictnamest   dropt   data_catR   R
   t   _swapt	   tmp_dummyt   itemt   dtR   (    (   R   s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   categorical@   sÀ    N
		
0	+1!"8
$
$%

t   skipc         C   sX  t  |  d ƒ s t |  ƒ rG d d l m } | |  d d d | d | ƒSt j |  ƒ } | j d k r~ | d d … d f } n | j d	 k rœ t d
 ƒ ‚ n  t j	 | d d ƒd k } | t j
 | d k d d ƒM} | j ƒ  r| d k rò | S| d k rt d ƒ ‚ qn  t j | j d ƒ | g } | r8| n | d d d … } t j | ƒ S(   s‹  
    Adds a column of ones to an array

    Parameters
    ----------
    data : array-like
        ``data`` is the column-ordered design matrix
    prepend : bool
        If true, the constant is in the first column.  Else the constant is
        appended (last column).
    has_constant : str {'raise', 'add', 'skip'}
        Behavior if ``data`` already has a constant. The default will return
        data without adding another constant. If 'raise', will raise an
        error if a constant is present. Using 'add' will duplicate the
        constant, if one is present.

    Returns
    -------
    data : array, recarray or DataFrame
        The original values with a constant (column of ones) as the first or
        last column. Returned value depends on input type.

    Notes
    -----
    When the input is recarray or a pandas Series or DataFrame, the added
    column's name is 'const'.
    iÿÿÿÿ(   t	   add_trendt   trendt   ct   prependt   has_constanti   Ni   s$   Only implementd 2-dimensional arraysR   i    g        RX   t   raises    data already contains a constant(   R   R   R   t   statsmodels.tsa.tsatoolsRY   R   t
   asanyarrayR   R*   t   ptpt   allR   t   onesRC   RN   (   R%   R\   R]   RY   t   xt   is_nonzero_const(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   add_constant
  s$    c         C   s»   t  j |  ƒ }  t  j | ƒ } |  j d k rF |  d d d … f }  n  |  j d | j d k rz t d | j d ƒ ‚ n  t  j |  | g ƒ } t  j j | ƒ t  j j | ƒ k r· t	 St
 S(   s    True if (Q, P) contrast `C` is estimable for (N, P) design `D`

    From an Q x P contrast matrix `C` and an N x P design matrix `D`, checks if
    the contrast `C` is estimable by looking at the rank of ``vstack([C,D])``
    and verifying it is the same as the rank of `D`.

    Parameters
    ----------
    C : (Q, P) array-like
        contrast matrix. If `C` has is 1 dimensional assume shape (1, P)
    D: (N, P) array-like
        design matrix

    Returns
    -------
    tf : bool
        True if the contrast `C` is estimable on design `D`

    Examples
    --------
    >>> D = np.array([[1, 1, 1, 0, 0, 0],
    ...               [0, 0, 0, 1, 1, 1],
    ...               [1, 1, 1, 1, 1, 1]]).T
    >>> isestimable([1, 0, 0], D)
    False
    >>> isestimable([1, -1, 0], D)
    True
    i   Ns   Contrast should have %d columns(   R   R   R   R   RC   R*   t   vstackt   linalgt   matrix_rankR>   R=   (   t   Ct   Dt   new(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   isestimable>  s    $gVçž¯Ò<c         C   s  t  j |  ƒ }  |  j ƒ  }  t  j j |  d ƒ \ } } } t  j | ƒ } | j d } | j d } | t  j j | ƒ } xL t	 t
 | | ƒ ƒ D]5 }	 | |	 | k r¹ d | |	 | |	 <qŽ d | |	 <qŽ Wt  j t  j | ƒ t  j | d d … t  j j f t  j | ƒ ƒ ƒ }
 |
 | f S(   s}   
    Return the pinv of an array X as well as the singular values
    used in computation.

    Code adapted from numpy.
    i    i   g      ð?g        N(   R   R   t	   conjugateRh   t   svdt   copyRC   t   maximumR    R   t   mint   dott	   transposet   multiplyt   coreRL   (   R   t   rcondt   ut   st   vtt   s_origt   mt   nt   cutoffR   t   res(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   pinv_extendedg  s    1c         C   s‹   t  j |  ƒ }  t  j |  d t  j ƒ} t  j |  j ƒ } | } | | |  j | d k @| | <d |  j | | j | <t  j | j | <| S(   s£   
    Return the reciprocal of an array, setting all entries less than or
    equal to 0 to 0. Therefore, it presumes that X should be positive in
    general.
    R"   i    g      ð?(   R   R   t
   zeros_liket   float64R   t   flatt   nan(   Rd   t   outt   nanst   pos(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   recipr  s    c         C   s‹   t  j |  ƒ }  t  j |  d t  j ƒ} t  j |  j ƒ } | } | | |  j | d k @| | <d |  j | | j | <t  j | j | <| S(   s’   
    Return the reciprocal of an array, setting all entries equal to 0
    as 0. It does not assume that X should be positive in
    general.
    R"   i    g      ð?(   R   R   R   R‚   R   Rƒ   R„   (   Rd   R…   R†   t   non_zero(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   recipr0  s    c         C   sa   t  j j |  d d ƒ } g  t  j | ƒ D] } |  d d … | f ^ q) } t  j t  j | ƒ ƒ S(   sF   
    Erase columns of zeros: can save some time in pseudoinverse.
    i   i    N(   R   t   addR    t   flatnonzeroR   Rt   (   t   matrixt   colsumR   t   val(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   clean0   s    2c         C   sÁ   | d k r! t j j |  ƒ } n  t j |  d d ƒ\ } } } t j | ƒ } | d d d … } g  } x5 t | ƒ D]' } | j | d d … | | f ƒ qt Wt j	 t j
 | ƒ ƒ j t j ƒ S(   s»   
    Return a matrix whose column span is the same as X.

    If the rank of X is known it can be specified as r -- no check
    is made to ensure that this really is the rank of X.

    t   full_matricesi    Niÿÿÿÿ(   R   R   Rh   Ri   t   LRo   t   argsortR   t   appendR   Rt   R?   R‚   (   R   t   rt   VRk   t   Ut   ordert   valueR   (    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   fullrank©  s    	%c         C   s#   t  | ƒ } d | | <|  j | ƒ S(   s  
    Unsqueeze a collapsed array

    >>> from numpy import mean
    >>> from numpy.random import standard_normal
    >>> x = standard_normal((3,4,5))
    >>> m = mean(x, axis=1)
    >>> m.shape
    (3, 5)
    >>> m = unsqueeze(m, 1, x.shape)
    >>> m.shape
    (3, 1, 5)
    >>>
    i   (   R'   t   reshape(   R%   R   t   oldshapet   newshape(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt	   unsqueeze¾  s    
c          G   s   t  d „  |  d d d … ƒ S(   sà  
    Returns the dot product of the given matrices.

    Parameters
    ----------
    arrs: argument list of ndarray

    Returns
    -------
    Dot product of all arguments.

    Examples
    --------
    >>> import numpy as np
    >>> from statsmodels.tools import chain_dot
    >>> A = np.arange(1,13).reshape(3,4)
    >>> B = np.arange(3,15).reshape(4,3)
    >>> C = np.arange(5,8).reshape(3,1)
    >>> chain_dot(A,B,C)
    array([[1820],
       [4300],
       [6780]])
    c         S   s   t  j | |  ƒ S(   N(   R   Rs   (   Rd   t   y(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   <lambda>ê  t    Niÿÿÿÿ(   R    (   t   arrs(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt	   chain_dotÒ  s    c         C   s   t  j t  j |  ƒ | d k ƒ } t  j |  d k t  j | ƒ ƒ } | | } t  j t  j |  ƒ t  j | ƒ ƒ } t  j | | <| S(   s¯   
    Returns np.dot(left_matrix, right_matrix) with the convention that
    nan * 0 = 0 and nan * x = nan if x != 0.

    Parameters
    ----------
    A, B : np.ndarrays
    i    (   R   Rs   R   t
   nan_to_numR„   (   t   At   Bt   should_be_nan_1t   should_be_nan_2t   should_be_nanRj   (    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   nan_dotí  s    
!!
$c         C   s   t  |  d |  ƒ S(   sˆ   
    Gets raw results back from wrapped results.

    Can be used in plotting functions or other post-estimation type
    routines.
    t   _results(   t   getattr(   t   results(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   maybe_unwrap_results  s    t   Bunchc           B   s   e  Z d  Z d „  Z RS(   sO   
    Returns a dict-like object with keys accessible via attribute lookup.
    c         O   s&   t  t |  ƒ j | | Ž  |  |  _ d  S(   N(   t   superR¯   t   __init__t   __dict__(   t   selft   argst   kwargs(    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyR±     s    (   t   __name__t
   __module__t   __doc__R±   (    (    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyR¯     s   c         C   sÁ   |  d k r |  St |  d ƒ } |  j d k rN | rA |  |  j f S|  d f Sn |  j d k rl t d ƒ ‚ n  | r{ |  j n d } | rª t j |  ƒ d d … d f | f St j	 |  ƒ | f Sd S(   sá  

    Parameters
    ----------
    x : array, Series, DataFrame or None
        Input to verify dimensions, and to transform as necesary
    ndarray : bool
        Flag indicating whether to always return a NumPy array. Setting False
        will return an pandas DataFrame when the input is a Series or a
        DataFrame.

    Returns
    -------
    out : array, DataFrame or None
        array or DataFrame with 2 dimensiona.  One dimensional arrays are
        returned as nobs by 1. None is returned if x is None.
    names : list of str or None
        list containing variables names when the input is a pandas datatype.
        Returns None if the input is an ndarray.

    Notes
    -----
    Accepts None for simplicity
    i   s   x mst be 1 or 2-dimensional.N(
   R   R   R   R5   R*   R1   R   R   R+   R,   (   Rd   RJ   t	   is_pandasR1   (    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt
   _ensure_2d  s    #(+   R¸   t   numpyR   t   numpy.lib.recfunctionst   libt   recfunctionsRG   t   numpy.linalgRh   R’   t   pandasR+   t   statsmodels.compat.pythonR    R   R   R   R   R   R   t   statsmodels.tools.dataR   R   R   R   R   R>   RW   R=   Rf   Rm   R€   Rˆ   RŠ   R   Rš   Rž   R£   Rª   R®   t   dictR¯   Rº   (    (    (    s6   lib/python2.7/site-packages/statsmodels/tools/tools.pyt   <module>   s,   4'Ê4	)										