ó
áp7]c           @   sÕ   d  Z  d d l m Z m Z d d l Z d d l m Z d d l m	 Z	 d d l
 m Z m Z d d l m Z m Z m Z m Z d d	 d
 g Z d
 e f d „  ƒ  YZ d e f d „  ƒ  YZ d	 e f d „  ƒ  YZ d S(   sY  
Multivariate Conditional and Unconditional Kernel Density Estimation
with Mixed Data Types

References
----------
[1] Racine, J., Li, Q. Nonparametric econometrics: theory and practice.
    Princeton University Press. (2007)
[2] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
    and Trends in Econometrics: Vol 3: No 1, pp1-88. (2008)
    http://dx.doi.org/10.1561/0800000009
[3] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
    with Categorical and Continuous Data." Working Paper. (2000)
[4] Racine, J. Li, Q. "Kernel Estimation of Multivariate Conditional
    Distributions Annals of Economics and Finance 5, 211-235 (2004)
[5] Liu, R., Yang, L. "Kernel estimation of multivariate
    cumulative distribution function."
    Journal of Nonparametric Statistics (2008)
[6] Li, R., Ju, G. "Nonparametric Estimation of Multivariate CDF
    with Categorical and Continuous Data." Working Paper
[7] Li, Q., Racine, J. "Cross-validated local linear nonparametric
    regression" Statistica Sinica 14(2004), pp. 485-512
[8] Racine, J.: "Consistent Significance Testing for Nonparametric
        Regression" Journal of Business & Economics Statistics
[9] Racine, J., Hart, J., Li, Q., "Testing the Significance of
        Categorical Predictor Variables in Nonparametric Regression
        Models", 2006, Econometric Reviews 25, 523-544

iÿÿÿÿ(   t   ranget   nextN(   t   optimize(   t
   mquantiles(   t   KDEMultivariatet	   KernelReg(   t   gpket   LeaveOneOutt   _get_type_post   _adjust_shapet   SingleIndexModelt
   SemiLineart	   TestFFormc           B   s,   e  Z d  Z d d „ Z d „  Z d „  Z RS(   sW  
    Nonparametric test for functional form.

    Parameters
    ----------
    endog: list
        Dependent variable (training set)
    exog: list of array_like objects
        The independent (right-hand-side) variables
    bw: array_like, str
        Bandwidths for exog or specify method for bandwidth selection
    fform: function
        The functional form ``y = g(b, x)`` to be tested. Takes as inputs
        the RHS variables `exog` and the coefficients ``b`` (betas)
        and returns a fitted ``y_hat``.
    var_type: str
        The type of the independent `exog` variables:

            - c: continuous
            - o: ordered
            - u: unordered

    estimator: function
        Must return the estimated coefficients b (betas). Takes as inputs
        ``(endog, exog)``.  E.g. least square estimator::

            lambda (x,y): np.dot(np.pinv(np.dot(x.T, x)), np.dot(x.T, y))

    References
    ----------
    See Racine, J.: "Consistent Significance Testing for Nonparametric
    Regression" Journal of Business & Economics Statistics.

    See chapter 12 in [1]  pp. 355-357.
    id   c         C   sg   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ t | d | d | ƒj |  _ |  j ƒ  |  _	 d  S(   Nt   bwt   var_type(
   t   endogt   exogR   t   fformt	   estimatort   nbootR   R   t   _compute_sigt   sig(   t   selfR   R   R   R   R   R   R   (    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyt   __init__R   s    						c         C   sø  |  j  } |  j } |  j | | ƒ } |  j | | ƒ } t j | ƒ d } | | } | t j | ƒ } |  j | ƒ |  _ t j	 d ƒ } d | d } d | d }	 | | }
 |	 | } |	 | } t j
 |  j d f ƒ } x¦ t |  j ƒ D]• } | j ƒ  } t j j d d d | f ƒ} | | k  } |
 | | | <| | } |  j | | ƒ } |  j | | ƒ } | | } |  j | ƒ | | <qé W| |  _ d } |  j t | d ƒ k r²d } n  |  j t | d	 ƒ k rÓd
 } n  |  j t | d ƒ k rôd } n  | S(   Ni    g      @i   g       @t   sizes   Not SignificantgÍÌÌÌÌÌì?t   *gffffffî?s   **g®Gáz®ï?s   ***(   R   R   R   R   t   npt   shapet   meant   _compute_test_statt	   test_statt   sqrtt   emptyR   R    t   copyt   randomt   uniformt   boots_resultsR   (   R   t   Yt   Xt   bt   mt   nt   residt   sqrt5t   fct1t   fct2t   u1t   u2t   rt   I_distt   jt   u_boott   probt   indt   Y_boott   b_hatt   m_hatt
   u_boot_hatR   (    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyR   \   sD    		





				c         C   sÀ  t  j | ƒ d } t |  j ƒ } t | d  d  … d  f ƒ j ƒ  } d } d } xø t | ƒ D]ê \ } } t | ƒ }	 t  j |	 ƒ }	 t	 |  j
 d | d |  j | d  d  … f d |  j d t ƒ}
 | | |	 |
 } |	 j |
 j k sí t ‚ | | j ƒ  7} | | d j ƒ  7} t  j | ƒ d k s,t ‚ t  j | ƒ d k s] t ‚ q] W| d | | d 9} t |  j ƒ d } |  j
 | j ƒ  } | d | | | d 9} | | t  j | | ƒ } | S(	   Ni    t   datat   data_predictR   t   tosumi   i   g      ð?(   R   R   R   R   t   Nonet   __iter__t	   enumerateR   t   squeezeR   R   R   t   Falset   AssertionErrort   sumR   R   t   prodR   (   R   t   uR)   t   XLOOt   uLOOt   ivalt   S2t   it   X_not_it   u_jt   Kt   f_it   ix_contt   hpt   T(    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyR   ‚   s,    "-(   t   __name__t
   __module__t   __doc__R   R   R   (    (    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyR   .   s   #
	&c           B   s>   e  Z d  Z d „  Z d „  Z d „  Z d d „ Z d „  Z RS(   s¿  
    Single index semiparametric model ``y = g(X * b) + e``.

    Parameters
    ----------
    endog: array_like
        The dependent variable
    exog: array_like
        The independent variable(s)
    var_type: str
        The type of variables in X:

            - c: continuous
            - o: ordered
            - u: unordered

    Attributes
    ----------
    b: array_like
        The linear coefficients b (betas)
    bw: array_like
        Bandwidths

    Methods
    -------
    fit(): Computes the fitted values ``E[Y|X] = g(X * b)``
           and the marginal effects ``dY/dX``.

    References
    ----------
    See chapter on semiparametric models in [1]

    Notes
    -----
    This model resembles the binary choice models. The user knows
    that X and b interact linearly, but ``g(X * b)`` is unknown.
    In the parametric binary choice models the user usually assumes
    some distribution of g() such as normal or logistic.

    c         C   sœ   | |  _  t | ƒ |  _ |  j  d |  _  t | d ƒ |  _ t | |  j ƒ |  _ t j |  j ƒ d |  _ |  j  |  _	 |  j
 |  _ |  j ƒ  \ |  _ |  _ d  S(   Ni    i   (   R   t   lenRM   R	   R   R   R   R   t   nobst	   data_typet   _est_loc_lineart   funct	   _est_b_bwR'   R   (   R   R   R   R   (    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyR   Æ   s    	c         C   sp   t  j j d |  j d f ƒ } t j |  j | d d ƒ} | d |  j !} | |  j } |  j | ƒ } | | f S(   NR   i   t   dispi    (   R   R"   R#   RM   R   t   fmint   cv_loot   _set_bw_bounds(   R   t   params0t   b_bwR'   R   (    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyRZ   Ò   s    c         C   s  t  j | ƒ } | d |  j !} | |  j } t |  j ƒ } t |  j ƒ j ƒ  } d } x« t | ƒ D] \ } } t | ƒ }	 |  j	 | d |	 d t  j
 | | ƒ d  d  … d  f d t  j
 |  j | | d … d  d  … f | ƒ ƒd }
 | |  j | |
 d 7} qc W| |  j S(   Ni    R   R   R;   i   i   (   R   t   asarrayRM   R   R   R   R>   R?   R   RY   t   dotR=   RV   (   R   t   paramsR'   R   t   LOO_Xt   LOO_Yt   LRJ   RK   R%   t   G(    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyR]   Ú   s    57c         C   s+  | d  k r |  j } n t | |  j ƒ } t j | ƒ d } t j | f ƒ } t j | |  j f ƒ } x· t | ƒ D]© } |  j |  j	 |  j
 t j |  j |  j ƒ d  d  … d  f d t j | | | d … d  d  … f |  j ƒ ƒ} | d | | <t j | d ƒ } | | | d  d  … f <qt W| | f S(   Ni    R;   i   (   R=   R   R	   RM   R   R   R    R    RY   R   R   Rb   R'   R@   (   R   R;   t   N_data_predictR   t   mfxRJ   t   mean_mfxt   mfx_c(    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyt   fitî   s    (2c         C   sq   d } | d t  |  j ƒ d 7} | d t  |  j ƒ d 7} | d |  j d 7} | d d 7} | d d 7} | S(   s    Provide something sane to print.s   Single Index Model 
s   Number of variables: K = s   
s   Number of samples:   nobs = s   Variable types:      s   BW selection method: cv_lss   Estimator type: local constant(   t   strRM   RV   R   (   R   t   repr(    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyt   __repr__  s    N(	   RR   RS   RT   R   RZ   R]   R=   Rl   Ro   (    (    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyR
      s   (			c           B   sA   e  Z d  Z d „  Z d „  Z d „  Z d d d „ Z d „  Z RS(   sr  
    Semiparametric partially linear model, ``Y = Xb + g(Z) + e``.

    Parameters
    ----------
    endog: array_like
        The dependent variable
    exog: array_like
        The linear component in the regression
    exog_nonparametric: array_like
        The nonparametric component in the regression
    var_type: str
        The type of the variables in the nonparametric component;

            - c: continuous
            - o: ordered
            - u: unordered

    k_linear : int
        The number of variables that comprise the linear component.

    Attributes
    ----------
    bw: array_like
        Bandwidths for the nonparametric component exog_nonparametric
    b: array_like
        Coefficients in the linear component
    nobs : int
        The number of observations.
    k_linear : int
        The number of variables that comprise the linear component.

    Methods
    -------
    fit(): Returns the fitted mean and marginal effects dy/dz

    Notes
    -----
    This model uses only the local constant regression estimator

    References
    ----------
    See chapter on Semiparametric Models in [1]
    c         C   s§   t  | d ƒ |  _ t  | | ƒ |  _ t | ƒ |  _ t  | |  j ƒ |  _ | |  _ t j |  j ƒ d |  _	 | |  _
 |  j
 |  _ |  j |  _ |  j ƒ  \ |  _ |  _ d  S(   Ni   i    (   R	   R   R   RU   RM   t   exog_nonparametrict   k_linearR   R   RV   R   RW   RX   RY   RZ   R'   R   (   R   R   R   Rp   R   Rq   (    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyR   :  s    		c         C   sd   t  j j d |  j |  j f ƒ } t j |  j | d d ƒ} | d |  j !} | |  j } | | f S(   s†   
        Computes the (beta) coefficients and the bandwidths.

        Minimizes ``cv_loo`` with respect to ``b`` and ``bw``.
        R   R[   i    (   R   R"   R#   Rq   RM   R   R\   R]   (   R   R_   R`   R'   R   (    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyRZ   G  s
    "c         C   sb  t  j | ƒ } | d |  j !} | |  j } t |  j ƒ } t |  j ƒ j ƒ  } t |  j ƒ j ƒ  } t  j |  j | ƒ d d … d f } d } xË t
 | ƒ D]½ \ }	 }
 t | ƒ } t | ƒ } t  j |
 | ƒ d d … d f } | | } |  j | d | d | d |  j |	 d d … f ƒd } | |	 d d … f } | |  j |	 | | d 7} q W| S(   só  
        Similar to the cross validation leave-one-out estimator.

        Modified to reflect the linear components.

        Parameters
        ----------
        params: array_like
            Vector consisting of the coefficients (b) and the bandwidths (bw).
            The first ``k_linear`` elements are the coefficients.

        Returns
        -------
        L: float
            The value of the objective function

        References
        ----------
        See p.254 in [1]
        i    NR   R   R;   i   (   R   Ra   Rq   R   R   R   R>   Rp   Rb   R=   R?   R   RY   (   R   Rc   R'   R   Rd   Re   t   LOO_Zt   XbRf   t   iiRK   R%   t   Zt   Xb_jt   YxRg   t   lt(    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyR]   T  s$    %"
!!c   
   
   C   sF  | d k r |  j } n t | |  j ƒ } | d k rB |  j } n t | |  j ƒ } t j | ƒ d } t j | f ƒ } t j | |  j f ƒ } |  j	 t j
 | |  j ƒ d d … d f } x| t | ƒ D]n } |  j |  j | |  j d | | d d … f ƒ} | d | | <t j | d ƒ }	 |	 | | d d … f <qÊ W| | f S(   s+   Computes fitted values and marginal effectsi    NR;   i   (   R=   R   R	   Rq   Rp   RM   R   R   R    R   Rb   R'   R    RY   R   R@   (
   R   t   exog_predictt   exog_nonparametric_predictRh   R   Ri   R%   RJ   Rj   Rk   (    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyRl   }  s"    ,c         C   sq   d } | d t  |  j ƒ d 7} | d t  |  j ƒ d 7} | d |  j d 7} | d d 7} | d d 7} | S(   s    Provide something sane to print.s'   Semiparamatric Partially Linear Model 
s   Number of variables: K = s   
s   Number of samples:   N = s   Variable types:      s   BW selection method: cv_lss   Estimator type: local constant(   Rm   RM   RV   R   (   R   Rn   (    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyRo   —  s    N(	   RR   RS   RT   R   RZ   R]   R=   Rl   Ro   (    (    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyR     s   ,			)(   RT   t   statsmodels.compat.pythonR    R   t   numpyR   t   scipyR   t   scipy.stats.mstatsR   t   statsmodels.nonparametric.apiR   R   t&   statsmodels.nonparametric._kernel_baseR   R   R   R	   t   __all__t   objectR   R
   R   (    (    (    sN   lib/python2.7/site-packages/statsmodels/sandbox/nonparametric/kernel_extras.pyt   <module>   s   "oo