ó
áp7]c           @   så   d  Z  d d l m Z d d l Z d d l m Z d d l m Z d d l	 m
 Z
 d d l m Z m Z d d l Z e Z d d „ Z d	 e f d
 „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e
 e f d „  ƒ  YZ d S(   s^  
Generalized additive models



Requirements for smoothers
--------------------------

smooth(y, weights=xxx) : ? no return ? alias for fit
predict(x=None) : smoothed values, fittedvalues or for new exog
df_fit() : degress of freedom of fit ?


Notes
-----
- using PolySmoother works for AdditiveModel, and GAM with Poisson and Binomial
- testfailure with Gamma, no other families tested
- there is still an indeterminacy in the split up of the constant across
  components (smoothers) and alpha, sum, i.e. constant, looks good.
  - role of offset, that I haven't tried to figure out yet

Refactoring
-----------
currently result is attached to model instead of other way around
split up Result in class for AdditiveModel and for GAM,
subclass GLMResults, needs verification that result statistics are appropriate
how much inheritance, double inheritance?
renamings and cleanup
interface to other smoothers, scipy splines

basic unittests as support for refactoring exist, but we should have a test
case for gamma and the others. Advantage of PolySmoother is that we can
benchmark against the parametric GLM results.

iÿÿÿÿ(   t   rangeN(   t   families(   t   PolySmoother(   t   GLM(   t   IterationLimitWarningt   iteration_limit_docc         C   sš  t  j |  ƒ } |  j d } | d k  r1 | } n	t  j d ƒ t  j d ƒ } t  j d ƒ t  j d ƒ } t  j d ƒ t  j d ƒ } t  j d ƒ t  j d ƒ } | d k  rÎ d | | | | d d } nl | d	 k  rû d | | | | d d
 } n? | d k  r(d | | | | d	 d } n d | d d } | t  j d | d | ƒ j t  j ƒ }	 | d k rxd }
 n | }
 t |
 d |  j	 ƒ  ƒ} | S(   s   

    i    iô  i2   i   id   iŒ   iÈ   g     Àb@i   g     À‚@i€  g     À¢@g      ©@gš™™™™™É?i   i   t   xN(
   t   npt   sortt   shapet   logt   linspacet   astypet   int32t   NoneR   t   copy(   R   t   s_argt   _xt   nt   nknotst   a1t   a2t   a3t   a4t   knotst   ordert   s(    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyt   default_smoother=   s*    	!!!)	t   Offsetc           B   s   e  Z d  „  Z d „  Z RS(   c         C   s   | |  _  | |  _ d  S(   N(   t   fnt   offset(   t   selfR   R   (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyt   __init__f   s    	c         O   s   |  j  | | Ž  |  j S(   N(   R   R   (   R   t   argst   kw(    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyt   __call__j   s    (   t   __name__t
   __module__R    R#   (    (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyR   d   s   	t   Resultsc           B   s>   e  Z d  „  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   c         C   sj   | j  \ |  _ |  _ | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ |  j	 | ƒ |  _
 d  S(   N(   R	   t   nobst   k_varst   Yt   alphat	   smoothersR   t   familyt   exogt   linkinversepredictt   mu(   R   R)   R*   R-   R+   R,   R   (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyR    o   s    							c         C   s   |  j  | ƒ S(   s\   expected value ? check new GLM, same as mu for given exog
        maybe remove this
        (   R.   (   R   R-   (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyR#   ~   s    c         C   s   |  j  j j |  j | ƒ ƒ S(   sB   expected value ? check new GLM, same as mu for given exog
        (   R,   t   linkt   inverset   predict(   R   R-   (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyR.   „   s    c         C   s¤   |  j  | ƒ } | j d |  j k rd d d l } | j d t ƒ t j |  j  | ƒ d d ƒ|  j S| j d |  j k r” t j | d d ƒ|  j St	 d ƒ ‚ d S(   s{   predict response, sum of smoothed components
        TODO: What's this in the case of GLM, corresponds to X*beta ?
        i    iÿÿÿÿNs&   old orientation, colvars, will go awayt   axisi   s   shape mismatch in predict(
   t   smoothedR	   R(   t   warningst   warnt   FutureWarningR   t   sumR*   t
   ValueError(   R   R-   t   exog_smoothedR5   (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyR2   ‰   s    	#c         C   s[   t  j g  t | j d ƒ D]7 } |  j | j | d d … | f ƒ |  j | ^ q ƒ j S(   s4   get smoothed prediction for each component

        i   N(   R   t   arrayR    R	   R+   R2   R   t   T(   R   R-   t   i(    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyR4       s    	c         C   sE   |  j  | ƒ } | j d ƒ } | j ƒ  |  j } | | } | | f S(   Ni    (   R4   t   meanR8   R*   (   R   R-   t
   componentst   meanst   constantt   components_demeaned(    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyt   smoothed_demeaned­   s
    
(   R$   R%   R    R#   R.   R2   R4   RC   (    (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyR&   m   s   					t   AdditiveModelc           B   s\   e  Z d  Z d
 d
 d
 d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d d d	 „ Z
 RS(   s…  additive model with non-parametric, smoothed components

    Parameters
    ----------
    exog : ndarray
    smoothers : None or list of smoother instances
        smoother instances not yet checked
    weights : None or ndarray
    family : None or family instance
        I think only used because of shared results with GAM and subclassing.
        If None, then Gaussian is used.
    c         C   sÛ   | |  _  | d  k	 r! | |  _ n t j |  j  j d ƒ |  _ | p| g  t | j d ƒ D]" } t | d  d  … | f ƒ ^ qW |  _ x+ t | j d ƒ D] } d |  j | _	 q– W| d  k rÎ t
 j ƒ  |  _ n	 | |  _ d  S(   Ni    i   i
   (   R-   R   t   weightsR   t   onesR	   R    R   R+   t   dfR   t   GaussianR,   (   R   R-   R+   RE   R,   R=   (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyR    Â   s    	Ec         C   s   d |  _  t j |  _ |  S(   s3   initialize iteration ?, should be removed

        i    (   t   iterR   t   inft   dev(   R   (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyt   _iter__Õ   s    	c   
      C   s˜  |  j  } |  j  j } | j |  j ƒ } t j |  j j d t j ƒ } | |  j j	 ƒ  |  j j	 ƒ  } xt
 |  j j d ƒ D]é } |  j | j ƒ  } t j | | | | ƒ j ƒ  } | rÝ | | | | f GHt d ƒ ‚ n  |  j | j | | | d |  j ƒ|  j | j ƒ  }	 |	 |  j j	 ƒ  |  j j	 ƒ  |  j  j | <t rX|  j | j GHn  | |	 | 7} q} W|  j  j } t | | |  j |  j |  j | ƒ S(   s   internal calculation for one fit iteration

        BUG: I think this does not improve, what is supposed to improve
            offset doesn't seem to be used, neither an old alpha
            The smoothers keep coef/params from previous iteration
        i   s   nan encounteredRE   (   t   resultsR)   R2   R-   R   t   zerosR	   t   float64RE   R8   R    R+   t   isnant   anyR9   t   smoothR   t   DEBUGt   paramsR&   R,   (
   R   t   _resultsR)   R/   R   R*   R=   t   tmpt   badt   tmp2(    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyt   nextÝ   s(    	 !
+c         C   sÕ   |  j  d 7_  t rR |  j  |  j j j f GH|  j j |  j ƒ j |  j j f GHn  |  j j |  j j |  j ƒ d |  j j ƒ  } |  j  |  j	 k r˜ t
 St j |  j | | ƒ |  j k  rÈ | |  _ t
 S| |  _ t S(   sÏ   condition to continue iteration loop

        Parameters
        ----------
        tol

        Returns
        -------
        cont : bool
            If true, then iteration should be continued.

        i   i   (   RI   RS   RM   R)   R	   R2   R-   RE   R8   t   maxitert   FalseR   t   fabsRK   t   rtolt   True(   R   t   curdev(    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyt   cont  s    &0#		c         C   sT   |  j  j j d t j g  t |  j j d ƒ D] } |  j | j ƒ  ^ q- ƒ j	 ƒ  S(   sI   degrees of freedom of residuals, ddof is sum of all smoothers df
        i    i   (
   RM   R)   R	   R   R;   R    R-   R+   t   df_fitR8   (   R   R=   (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyt   df_resid   s    c         C   s.   |  j  j |  j  |  j ƒ d j ƒ  |  j ƒ  S(   s1   estimate standard deviation of residuals
        i   (   RM   R)   R-   R8   Rb   (   R   (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyt   estimate_scale%  s    gíµ ÷Æ°>i   c   	      C   sm  | |  _  | |  _ |  j ƒ  d } | |  j j ƒ  |  j j ƒ  } t j |  j j d t j	 ƒ } x” t
 |  j j d ƒ D]| } |  j | j | | | d |  j ƒ|  j | j ƒ  } | |  j j ƒ  |  j j ƒ  | | <| | j ƒ  8} | | 7} qx Wt | | |  j |  j |  j | ƒ |  _ x |  j ƒ  r@|  j ƒ  |  _ q"W|  j |  j k rft j t t ƒ n  |  j S(   sw   fit the model to a given endogenous variable Y

        This needs to change for consistency with statsmodels

        i    i   RE   (   R]   RZ   RL   RE   R8   R   RN   R-   R	   RO   R    R+   RR   R2   R&   R,   RM   R`   RY   RI   R5   R6   R   R   (	   R   R)   R]   RZ   R/   R*   R   R=   RV   (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyt   fit+  s&    		
 
$'N(   R$   R%   t   __doc__R   R    RL   RY   R`   Rb   Rc   Rd   (    (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyRD   ´   s   		&			t   Modelc           B   sA   e  Z d e j ƒ  d  „ Z d „  Z d d „ Z d d d „ Z RS(   c         C   sN   t  j |  | d | d | ƒt j |  | | d | ƒ|  j | k sJ t ‚ d  S(   NR+   R,   (   RD   R    R   R,   t   AssertionError(   R   t   endogR-   R+   R,   (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyR    \  s    c         C   s¦  |  j  } | j } t j |  j ƒ j ƒ  r2 d GHn  |  j j j | j	 |  j
 ƒ ƒ | _ |  j j | j ƒ } t j | ƒ j ƒ  r‘ | |  _ d GHn  | |  _ t rÏ d t j |  j j j | j ƒ ƒ j ƒ  f GHn  | j	 |  j
 ƒ |  j j j | j ƒ | | j } t |  j
 d |  j d |  j d |  j ƒ} | j | ƒ } |  j j | | j	 |  j
 ƒ g ƒ | | _ |  j j j | j	 |  j
 ƒ ƒ | _ |  j d 7_ | |  _  | S(   Nt   nanweights1t   nanweights2s   deriv isnanR+   RE   R,   i   (   RM   R)   R   RP   RE   t   allR,   R0   R1   R2   R-   R/   RS   t   derivRQ   RD   R+   Rd   t   historyt   appendRI   (   R   RU   R)   RE   t   Zt   m(    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyRY   c  s.    		$		/$"	$	c         C   s[   | d k r |  j } n  | |  j j } t j | d ƒ |  j j |  j j ƒ j ƒ  |  j	 S(   s9   
        Return Pearson's X^2 estimate of scale.
        i   N(
   R   R)   RM   R/   R   t   powerR,   t   varianceR8   Rb   (   R   R)   t   resid(    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyRc   ‡  s
    +gíµ ÷Æ°>i   c   	      C   sg  | |  _  | |  _ t j | t j ƒ |  _ g  |  _ |  j ƒ  |  j j ƒ  } |  j	 j
 | ƒ } |  j	 j | ƒ |  j	 j j | ƒ | | } t |  j d |  j d |  j	 ƒ} | j | ƒ |  _ |  j	 j j |  j j |  j ƒ ƒ |  j _ | |  j _ x8 |  j ƒ  r.|  j ƒ  |  _ |  j ƒ  |  _ |  j _ q÷ W|  j |  j k r`d d  l } | j t t ƒ n  |  j S(   NR+   R,   iÿÿÿÿ(   R]   RZ   R   t   asarrayRO   R)   Rm   RL   R>   R,   t   starting_muR0   Rl   RD   R-   R+   Rd   RM   R1   R2   R/   R`   RY   Rc   t   scaleRI   R5   R6   R   R   (	   R   R)   R]   RZ   R*   t   mu0Ro   Rp   R5   (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyRd   ”  s&    			
-!*N(	   R$   R%   R   R   RH   R    RY   Rc   Rd   (    (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyRf   L  s   	$(   Re   t   statsmodels.compat.pythonR    t   numpyR   t   statsmodels.genmodR   t+   statsmodels.sandbox.nonparametric.smoothersR   t+   statsmodels.genmod.generalized_linear_modelR   t   statsmodels.tools.sm_exceptionsR   R   R5   R[   RS   R   R   t   objectR   R&   RD   Rf   (    (    (    s6   lib/python2.7/site-packages/statsmodels/sandbox/gam.pyt   <module>#   s   '	G˜