
p7]c           @   s$  d  Z  d d l m Z m Z m Z m Z d d l Z d d l Z	 d d l
 m Z d d l m Z m Z m Z d d l m Z d d l m Z d d l m Z d d	 l m Z d d
 l m Z d d l m Z d d l m Z m Z m Z m  Z  m! Z! d d d d d d d d d d d d d d d d d g Z" d   Z# d d  d!  Z$ d d d"  Z& d d#  Z' d$   Z( d i  e) i  d e* d%  Z+ d d d d&  Z, d d'  Z- d d d d(  Z. d d d d d d d)  Z/ e) d* d+ d, d- d d.  Z0 e  j1 i d/ d0 6 e0 _  e) d* d+ d, d- d d1  Z2 e  j1 i d2 d0 6 e2 _  d* d d3  Z3 e! j1 i d/ d0 6 e3 _  d* d d4  Z4 e! j1 i d2 d0 6 e4 _  d e) d d d5  Z5 e i d6 d0 6e5 _  d d7  Z6 e i d6 d0 6e6 _  d8 d d d9  Z7 e i d6 d0 6e7 _  d8 d d:  Z8 d;   Z9 d e) d d<  Z: d S(=   s   Partial Regression plot and residual plots to find misspecification


Author: Josef Perktold
License: BSD-3
Created: 2011-01-23

update
2011-06-05 : start to convert example to usable functions
2011-10-27 : docstrings

i(   t   lranget   string_typest   lzipt   rangeN(   t   dmatrix(   t   OLSt   GLSt   WLS(   t   GLM(   t   GEE(   t   wls_prediction_std(   t   utils(   t   lowess(   t   maybe_unwrap_resultsi   (   t   _plot_added_variable_doct   _plot_partial_residuals_doct   _plot_ceres_residuals_doct   _plot_influence_doct   _plot_leverage_resid2_doct   plot_fitt   plot_regress_exogt   plot_partregresst	   plot_ccprt   plot_partregress_gridt   plot_ccpr_gridt
   add_lowesst   abline_plott   influence_plott   plot_leverage_resid2t   added_variable_residst   partial_residst   ceres_residst   plot_added_variablet   plot_partial_residualst   plot_ceres_residualsc         C   s   d |  j  d |  j S(   Ng       @i   (   t   df_modelt   nobs(   t   results(    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyt   _high_leverage)   s    i    g?c         K   s~   |  j    | j } |  j    | j } t | | d | | } |  j | d d  d f | d d  d f d d d |  j S(   s  
    Add Lowess line to a plot.

    Parameters
    ----------
    ax : matplotlib Axes instance
        The Axes to which to add the plot
    lines_idx : int
        This is the line on the existing plot to which you want to add
        a smoothed lowess line.
    frac : float
        The fraction of the points to use when doing the lowess fit.
    lowess_kwargs
        Additional keyword arguments are passes to lowess.

    Returns
    -------
    fig : matplotlib Figure instance
        The figure that holds the instance.
    t   fracNi    i   t   rt   lwg      ?(   t	   get_linest   _yt   _xR   t   plott   figure(   t   axt	   lines_idxR'   t   lowess_kwargst   y0t   x0t   lres(    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR   .   s
    9c      
   K   s  t  j |  \ } } t  j | |  j  \ } } t |   }  |  j j } |  j j d d  | f } t j |  }	 | |	 } | |	 } | j	 | | d d |  j j
 | d k	 r | j	 | | |	 d d d n  d | }
 t |   \ } } } | j	 | |  j |	 d d d	 d d
 | | j | | |	 | |	 d d d d d d | j |
  | j |  | j |  j j
  | j d d d d  | S(   s  Plot fit against one regressor.

    This creates one graph with the scatterplot of observed values compared to
    fitted values.

    Parameters
    ----------
    results : result instance
        result instance with resid, model.endog and model.exog as attributes
    x_var : int or str
        Name or index of regressor in exog matrix.
    y_true : array_like
        (optional) If this is not None, then the array is added to the plot
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.
    kwargs
        The keyword arguments are passed to the plot command for the fitted
        values points.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    Examples
    --------
    Load the Statewide Crime data set and perform linear regression with
    `poverty` and `hs_grad` as variables and `murder` as the response

    >>> import statsmodels.api as sm
    >>> import matplotlib.pyplot as plt

    >>> data = sm.datasets.statecrime.load_pandas().data
    >>> murder = data['murder']
    >>> X = data[['poverty', 'hs_grad']]

    >>> X["constant"] = 1
    >>> y = murder
    >>> model = sm.OLS(y, X)
    >>> results = model.fit()

    Create a plot just for the variable 'Poverty':

    >>> fig, ax = plt.subplots()
    >>> fig = sm.graphics.plot_fit(results, 0, ax=ax)
    >>> ax.set_ylabel("Murder Rate")
    >>> ax.set_xlabel("Poverty Level")
    >>> ax.set_title("Linear Regression")

    >>> plt.show()

    .. plot:: plots/graphics_plot_fit_ex.py

    Nt   bot   labels   b-s   True valuess   Fitted values versus %st   Dt   colorR(   t   fittedt	   linewidthi   t   kt   alphagffffff?t   loct   bestt	   numpoints(   R   t   create_mpl_axt   maybe_name_or_idxt   modelR   t   endogt   exogt   npt   argsortR-   t   endog_namest   NoneR
   t   fittedvaluest   vlinest	   set_titlet
   set_xlabelt
   set_ylabelt   legend(   R%   t   exog_idxt   y_trueR/   t   kwargst   figt	   exog_namet   yt   x1t
   x1_argsortt   titlet   prstdt   iv_lt   iv_u(    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR   J   s,    :

 

&c      
   C   s  t  j |  } t  j | |  j  \ } } t |   }  |  j j } |  j j d d  | f } t |   \ } } } | j d d d  }	 |	 j	 | |  j j
 d d d d d d	 | |	 j	 | |  j d
 d d d	 d d d |	 j | | | d d d d d d |	 j d d d |	 j |  |	 j |  |	 j d d  | j d d d  }	 |	 j	 | |  j d  |	 j d d d d  |	 j d | d d |	 j |  |	 j d  | j d d d  }	 t j |  j j j d t  }
 t |
 | <|  j j d d  |
 f } d d l m } t |  j j j | | d | d |  j j j | d  t d! |	 } |	 j d" d d | j d d d#  }	 t |  | d! |	 } |	 j d$ d d | j d% | d d | j    | j! d& d  | S('   ss  Plot regression results against one regressor.

    This plots four graphs in a 2 by 2 figure: 'endog versus exog',
    'residuals versus exog', 'fitted versus exog' and
    'fitted plus residual versus exog'

    Parameters
    ----------
    results : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int or str
        Name or index of regressor in exog matrix
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : matplotlib figure instance

    Examples
    --------
    Load the Statewide Crime data set and build a model with regressors
    including the rate of high school graduation (hs_grad), population in urban
    areas (urban), households below poverty line (poverty), and single person
    households (single).  Outcome variable is the muder rate (murder).

    Build a 2 by 2 figure based on poverty showing fitted versus actual murder
    rate, residuals versus the poverty rate, partial regression plot of poverty,
    and CCPR plot for poverty rate.

    >>> import statsmodels.api as sm
    >>> import matplotlib.pyplot as plot
    >>> import statsmodels.formula.api as smf

    >>> fig = plt.figure(figsize=(8, 6))
    >>> crime_data = sm.datasets.statecrime.load_pandas()
    >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single',
    ...                   data=crime_data.data).fit()
    >>> sm.graphics.plot_regress_exog(results, 'poverty', fig=fig)
    >>> plt.show()

    .. plot:: plots/graphics_regression_regress_exog.py

    Ni   i   t   oR8   t   bR<   g?R6   R7   R(   R9   g      ?R:   R;   gffffff?s   Y and Fitted vs. Xt   fontsizet   largeR=   R>   RT   i    t   blacks   Residuals versus %st   residi   i(   t   Seriest   namet   indext
   obs_labelsR/   s   Partial regression ploti   s	   CCPR Plots   Regression Plots for %st   top("   R   t   create_mpl_figRA   RB   R   RG   RD   R
   t   add_subplotR-   RC   RI   RJ   RK   RL   RM   RN   R`   t   axhlineRE   t   onest   shapet   boolt   Falset   pandasRa   R   t   datat
   orig_endogt
   row_labelsR   t   suptitlet   tight_layoutt   subplots_adjust(   R%   RO   RR   RS   t   y_nameRU   RX   RY   RZ   R/   t	   exog_notit   exog_othersRa   (    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR      sL    /+!%

c         C   sU   t  |  |  j   } t  | |  j   } t  | j | j  j   } | | | f f S(   s  Partial regression.

    regress endog on exog_i conditional on exog_others

    uses OLS

    Parameters
    ----------
    endog : array_like
    exog : array_like
    exog_others : array_like

    Returns
    -------
    res1c : OLS results instance

    (res1a, res1b) : tuple of OLS results instances
         results from regression of endog on exog_others and of exog_i on
         exog_others

    (   R   t   fitR`   (   RC   t   exog_iRv   t   res1at   res1bt   res1c(    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyt   _partial_regression	  s    c	      	   K   s  t  j |  \ }
 } t |  t  r: t |  d |  }  n  t | t  r[ t | |  } n6 t | t  r d j |  } t | |  } n | } t } t | t j	  r | j
 d k r t } n$ t | t j  r | j r t } n  t | t  r
t | d |  } n  | r| j |  | d |	  t |  |  j   } t | t j	  rSd n | j } t |  t j	  rtd n |  j j d } n t |  |  j   } t | |  j   } | j } | j } | j j } | j j } | j | | d |	  t | |  j   } t d | j d d d d	 | }
 | d k r=d } n  | j d
 |  | j d
 |  | j d |  | t k r| d k	 r| j } n* t | d  r| j } n | j j  j! } | d k rt" t# |   } qn  | t k	 rt# |  t# |  k rt$ d   n  | j% t& d d d d   t  j' t" t# |   | t( | j | j  d g t# |  d d	 | | } n  | r|
 | j | j f f S|
 Sd S(   s  Plot partial regression for a single regressor.

    Parameters
    ----------
    endog : ndarray or string
       endogenous or response variable. If string is given, you can use a
       arbitrary translations as with a formula.
    exog_i : ndarray or string
        exogenous, explanatory variable. If string is given, you can use a
        arbitrary translations as with a formula.
    exog_others : ndarray or list of strings
        other exogenous, explanatory variables. If a list of strings is given,
        each item is a term in formula. You can use a arbitrary translations
        as with a formula. The effect of these variables will be removed by
        OLS regression.
    data : DataFrame, dict, or recarray
        Some kind of data structure with names if the other variables are
        given as strings.
    title_kwargs : dict
        Keyword arguments to pass on for the title. The key to control the
        fonts is fontdict.
    obs_labels : bool or array-like
        Whether or not to annotate the plot points with their observation
        labels. If obs_labels is a boolean, the point labels will try to do
        the right thing. First it will try to use the index of data, then
        fall back to the index of exog_i. Alternatively, you may give an
        array-like object corresponding to the obseveration numbers.
    labels_kwargs : dict
        Keyword arguments that control annotate for the observation labels.
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.
    ret_coords : bool
        If True will return the coordinates of the points in the plot. You
        can use this to add your own annotations.
    kwargs
        The keyword arguments passed to plot for the points.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.
    coords : list, optional
        If ret_coords is True, return a tuple of arrays (x_coords, y_coords).

    Notes
    -----
    The slope of the fitted line is the that of `exog_i` in the full
    multiple regression. The individual points can be used to assess the
    influence of points on the estimated coefficient.

    See Also
    --------
    plot_partregress_grid : Plot partial regression for a set of regressors.

    Examples
    --------
    Load the Statewide Crime data set and plot partial regression of the rate
    of high school graduation (hs_grad) on the murder rate(murder).

    The effects of the percent of the population living in urban areas (urban),
    below the poverty line (poverty) , and in a single person household (single)
    are removed by OLS regression.

    >>> import statsmodels.api as sm
    >>> import matplotlib.pyplot as plt

    >>> crime_data = sm.datasets.statecrime.load_pandas()
    >>> sm.graphics.plot_partregress(endog='murder', exog_i='hs_grad',
    ...                              exog_others=['urban', 'poverty', 'single'],
    ...                              data=crime_data.data, obs_labels=False)
    >>> plt.show()

    .. plot:: plots/graphics_regression_partregress.py

    More detailed examples can be found in the Regression Plots notebook
    on the examples page.

    s   -1t   +i    R[   t   xRT   R8   R;   R/   s	   e(%s | X)s   Partial Regression PlotRc   s*   obs_labels does not match length of exog_it   hat   centert   vat   bottomi   s   x-largeN(   i    i   ()   R   R@   t
   isinstanceR   R   t   listt   joinRl   RE   t   ndarrayt   sizet   Truet   pdt	   DataFramet   emptyR-   R   Rw   Rb   t   design_infot   column_namesR`   RB   RG   R   t   paramsRL   RM   RK   RH   Rc   t   hasattrRn   Rp   R    t   lent
   ValueErrort   updatet   dictt   annotate_axesR   (   RC   Rx   Rv   Rn   t   title_kwargsRd   t   label_kwargsR/   t
   ret_coordsRQ   RR   t   RHSt   RHS_isemtpyt   fitted_linet   x_axis_endog_namet   y_axis_endog_namet	   res_yaxist	   res_xaxist   xaxis_residt   yaxis_resid(    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR   '  sl    U!		!+		"	c         C   s  d d l  } t j |  } t j | |  j  \ } } | j |  j j d |  j j } |  j j } | j	 d } t
 |  d d }	 |	 t
 |  k r d n d }
 | d k	 r | \ }	 }
 n  |
 d k r i i d d 6d 6} n  t j |  j j  } x t |  D] \ } } t |  } | j |  | j | d d  | f d	 | | } | j |	 |
 | d  } t | | j | d d  | f d | | | d
 | d | d t | j d  qW| j d d d | j   | j d d  | S(   s  Plot partial regression for a set of regressors.

    Parameters
    ----------
    results : results instance
        A regression model results instance
    exog_idx : None, list of ints, list of strings
        (column) indices of the exog used in the plot, default is all.
    grid : None or tuple of int (nrows, ncols)
        If grid is given, then it is used for the arrangement of the subplots.
        If grid is None, then ncol is one, if there are only 2 subplots, and
        the number of columns is two otherwise.
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `fig` is None, the created figure.  Otherwise `fig` itself.

    Notes
    -----
    A subplot is created for each explanatory variable given by exog_idx.
    The partial regression plot shows the relationship between the response
    and the given explanatory variable after removing the effect of all other
    explanatory variables in exog.

    See Also
    --------
    plot_partregress : Plot partial regression for a single regressor.
    plot_ccpr : Plot CCPR against one regressor

    Examples
    --------
    Using the state crime dataset seperately plot the effect of the each
    variable on the on the outcome, murder rate while accounting for the effect
    of all other variables in the model visualized with a grid of partial
    regression plots.

    >>> from statsmodels.graphics.regressionplots import plot_partregress_grid
    >>> import statsmodels.api as sm
    >>> import matplotlib.pyplot as plt
    >>> import statsmodels.formula.api as smf

    >>> fig = plt.figure(figsize=(8, 6))
    >>> crime_data = sm.datasets.statecrime.load_pandas()
    >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single',
    ...                   data=crime_data.data).fit()
    >>> plot_partregress_grid(results, fig=fig)
    >>> plt.show()

    .. plot:: plots/graphics_regression_partregress_grid.py

    References
    ----------
    See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm

    iNRb   i   i   t   smallR]   t   fontdictt   columnsR/   R   Rd   t    s   Partial Regression PlotR^   Re   gffffff?(   Rm   R   Rf   RA   RB   Ra   RC   RG   RD   Rj   R   RH   RE   t   arrayt
   exog_namest	   enumerateR    t   popR   Rg   R   Rl   RK   Rq   Rr   Rs   (   R%   RO   t   gridRR   Rm   RS   RT   RD   t   k_varst   nrowst   ncolsR   t   other_namest   it   idxt   othersRv   R/   (    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR     s8    <!"

c   
      C   s  t  j |  \ } } t  j | |  j  \ } } t |   }  |  j j d d  | f } | |  j | } | j | | |  j d  d d l	 m
 } t | | |   j   } | j }	 t |	 t d |    } | j d  | j d | | f  | j d |  | S(	   s  Plot CCPR against one regressor.

    Generates a CCPR (component and component-plus-residual) plot.

    Parameters
    ----------
    results : result instance
        A regression results instance.
    exog_idx : int or string
        Exogenous, explanatory variable. If string is given, it should
        be the variable name that you want to use, and you can use arbitrary
        translations as with a formula.
    ax : Matplotlib AxesSubplot instance, optional
        If given, it is used to plot in instead of a new figure being
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    See Also
    --------
    plot_ccpr_grid : Creates CCPR plot for multiple regressors in a plot grid.

    Notes
    -----
    The CCPR plot provides a way to judge the effect of one regressor on the
    response variable by taking into account the effects of the other
    independent variables. The partial residuals plot is defined as
    Residuals + B_i*X_i versus X_i. The component adds the B_i*X_i versus
    X_i to show where the fitted line would lie. Care should be taken if X_i
    is highly correlated with any of the other independent variables. If this
    is the case, the variance evident in the plot will be an underestimate of
    the true variance.

    Examples
    --------
    Using the state crime dataset plot the effect of the rate of single
    households ('single') on the murder rate while accounting for high school
    graduation rate ('hs_grad'), percentage of people in an urban area, and rate
    of poverty ('poverty').


    >>> import statsmodels.api as sm
    >>> import matplotlib.pyplot as plot
    >>> import statsmodels.formula.api as smf

    >>> crime_data = sm.datasets.statecrime.load_pandas()
    >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single',
    ...                   data=crime_data.data).fit()
    >>> sm.graphics.plot_ccpr(results, 'single')
    >>> plt.show()

    .. plot:: plots/graphics_regression_ccpr.py

    References
    ----------
    http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm

    NR[   i(   t   add_constantR/   s*   Component and component plus residual plots   Residual + %s*beta_%ds   %s(   R   R@   RA   RB   R   RD   R   R-   R`   t   statsmodels.tools.toolsR   R   Rw   R   R   RK   RM   RL   (
   R%   RO   R/   RR   RS   RU   t   x1betaR   t   modR   (    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR   .  s    ?	c         C   s[  t  j |  } t  j | |  j  \ } } | d k	 rE | \ } } nL t |  d k r t t j t |  d   } d } n t |  } d } d } x t	 |  D] \ } }	 |  j j
 d d  |	 f j   d k r d } q n  | j | | | d |  }
 t |  d |	 d |
 } |
 j d  q W| j d	 d
 d | j   | j d d  | S(   s  Generate CCPR plots against a set of regressors, plot in a grid.

    Generates a grid of CCPR (component and component-plus-residual) plots.

    Parameters
    ----------
    results : result instance
        uses exog and params of the result instance
    exog_idx : None or list of int
        (column) indices of the exog used in the plot
    grid : None or tuple of int (nrows, ncols)
        If grid is given, then it is used for the arrangement of the subplots.
        If grid is None, then ncol is one, if there are only 2 subplots, and
        the number of columns is two otherwise.
    fig : Matplotlib figure instance, optional
        If given, this figure is simply returned.  Otherwise a new figure is
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    Notes
    -----
    Partial residual plots are formed as::

        Res + Betahat(i)*Xi versus Xi

    and CCPR adds::

        Betahat(i)*Xi versus Xi

    See Also
    --------
    plot_ccpr : Creates CCPR plot for a single regressor.

    Examples
    --------
    Using the state crime dataset seperately plot the effect of the each
    variable on the on the outcome, murder rate while accounting for the effect
    of all other variables in the model.

    >>> import statsmodels.api as sm
    >>> import matplotlib.pyplot as plt
    >>> import statsmodels.formula.api as smf

    >>> fig = plt.figure(figsize=(8, 8))
    >>> crime_data = sm.datasets.statecrime.load_pandas()
    >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single',
    ...                   data=crime_data.data).fit()
    >>> sm.graphics.plot_ccpr_grid(results, fig=fig)
    >>> plt.show()

    .. plot:: plots/graphics_regression_ccpr_grid.py

    References
    ----------
    See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm
    i   g       @i   i    NRO   R/   R   s&   Component-Component Plus Residual PlotR]   R^   Re   gffffff?(   R   Rf   RA   RB   RH   R   t   intRE   t   ceilR   RD   t   varRg   R   RK   Rq   Rr   Rs   (   R%   RO   R   RR   RS   R   R   t   seen_constantR   R   R/   (    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR     s*    >	(
c            s  | d k	 r | j   } n d } t j |  \ } } | r | j \   | d k r | j j d d  d f j   | j j d d  d f j   g } q nB  d k	 o  d k	 s t	 d   n  | d k r | j   } n  | d   | d   g }	 | j
 |  d d l m }
 d |
 f     f d     Y    | |	 |  } | j |  | j j d	 | j  | _ | j j d
 | j  | _ | r| j |  n  | r| j |  n  | S(   s  
    Plots a line given an intercept and slope.

    intercept : float
        The intercept of the line
    slope : float
        The slope of the line
    horiz : float or array-like
        Data for horizontal lines on the y-axis
    vert : array-like
        Data for verterical lines on the x-axis
    model_results : statsmodels results instance
        Any object that has a two-value `params` attribute. Assumed that it
        is (intercept, slope)
    ax : axes, optional
        Matplotlib axes instance
    kwargs
        Options passed to matplotlib.pyplot.plt

    Returns
    -------
    fig : Figure
        The figure given by `ax.figure` or a new instance.

    Examples
    --------
    >>> import numpy as np
    >>> import statsmodels.api as sm

    >>> np.random.seed(12345)
    >>> X = sm.add_constant(np.random.normal(0, 20, size=30))
    >>> y = np.dot(X, [25, 3.5]) + np.random.normal(0, 30, size=30)
    >>> mod = sm.OLS(y,X).fit()
    >>> fig = sm.graphics.abline_plot(model_results=mod)
    >>> ax = fig.axes[0]
    >>> ax.scatter(X[:,1], y)
    >>> ax.margins(.1)
    >>> import matplotlib.pyplot as plt
    >>> plt.show()

    .. plot:: plots/graphics_regression_abline.py

    Ni   s-   specify slope and intercepty or model_resultsi    i(   t   Line2Dt   ABLine2Dc              s8   e  Z   f d    Z   f d   Z   f d   Z RS(   c            s/   t    |   j | |   d  |  _ d  |  _ d  S(   N(   t   supert   __init__RH   t   id_xlim_callbackt   id_ylim_callback(   t   selft   argsRQ   (   R   (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR   &  s    	c            s^   |  j  } |  j r( | j j |  j  n  |  j rG | j j |  j  n  t   |   j   d  S(   N(   t   axesR   t	   callbackst
   disconnectR   R   t   remove(   R   R/   (   R   (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR   +  s    			c            s   | j  t  | j   } g  | D] } | |  k r  | ^ q  } | d } | j   } | d    | d    g } | j | |  | j j j   d  S(   Ni    i   (   t   set_autoscale_onRl   t   get_childrent   get_xlimt   set_dataR.   t   canvast   draw(   R   R/   t   childrent   childt   ablinest   ablineR~   RT   (   t	   interceptt   slope(    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyt   update_datalim3  s    %
$(   t   __name__t
   __module__R   R   R   (    (   R   R   R   (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR   %  s   t   xlim_changedt   ylim_changed(   RH   R   R   R@   R   RB   RD   t   mint   maxR   t   set_xlimt   matplotlib.linesR   t   add_lineR   t   connectR   R   R   t   hlinet   vline(   R   R   t   horizt   vertt   model_resultsR/   RQ   R~   RR   t   data_yR   t   line(    (   R   R   R   sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR     s4    -+$"g?t   cooksi0   g      ?c         K   s&  | }	 t  j |  \ }
 } | j   j d  r@ |	 j d } n> | j   j d  rn t j |	 j d  } n t d |   t j	 |  } | d d } | | j
   | | d } |	 j } | r |	 j } n	 |	 j } d d l m } | j j d	 | d |  j  } t j |  | k } | t |   k } t j | |  } | j | | d
 | d | |  j j j } | d  k rt t |   } n  t  j t j |  d | t | |  t | d d | d d  d |  } i d d 6d d 6} | j d |  | j  d |  | j! d |  |
 S(   Nt   cooi    t   dffs   Criterion %s not understoodi   i   i(   t   statsg      ?t   sR<   g      ?s   x-largei   R]   R_   R8   s   Studentized Residualss
   H Leverages   Influence Ploti@   i@   ("   R   R@   t   lowert
   startswitht   cooks_distanceRE   t   abst   dffitsR   t   ptpR   t   hat_matrix_diagt   resid_studentized_externalt   resid_studentizedt   scipyR   t   tt   ppft   df_residR&   t
   logical_ort   scatterRB   Rn   Rp   RH   R    R   R   t   whereR   RM   RL   RK   (   R%   t	   influencet   externalR<   t	   criterionR   t
   plot_alphaR/   RQ   t   inflRR   t   psizet	   old_ranget	   new_ranget   leveraget   residsR   t   cutofft   large_residt   large_leveraget   large_pointst   labelst   font(    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyt   _influence_plotJ  s@    		  	sn   results: object
	Results for a fitted regression model
influence: instance
    instance of Influence for modelt   extra_params_docc   
      K   sF   |  j    } t |  | d | d | d | d | d | d | | }	 |	 S(   NR   R<   R   R   R   R/   (   t   get_influenceR  (
   R%   R   R<   R   R   R   R/   RQ   R   t   res(    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR     s
    s9   results: object
    Results for a fitted regression modelc         K   sx  d d l  m } m } t j |  \ } } | } | j }	 | | j  }
 | j |
 d |	 d |  | j d  | j	 d  | j
 d  |	 t |   k } | j d | d  } t j |
  | k } |  j j j } | d  k r t t |  j   } n  t j t j | |   d	 } t j | | t |
 d |	  d g t |  j  d d | d d d d } | j d d  | S(   Ni(   t   zscoret   normi   R[   s   Normalized residuals**2t   Leverages)   Leverage vs. Normalized residuals squaredg      ?i    i   R^   R/   R   R   R   R   g333333?(   i    i   (   t   scipy.statsR  R  R   R@   R   R`   R-   RL   RM   RK   R&   R   RE   R   RB   Rn   Rp   RH   R    R   R$   R   R   R   R   t   margins(   R%   R   R<   R/   RQ   R  R  RR   R   R   R`   R  R  R  R  Rc   (    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyt   _plot_leverage_resid2  s*    	c         K   s.   |  j    } t |  | d d d d  | } | S(   NR<   g?R/   (   R
  R  RH   (   R%   R<   R/   RQ   R   R  (    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR     s    	c      	   C   s   |  j  } t j |  \ } } t |  | d | d | d | \ } }	 | j |	 | d d d | j d d d	 t |  t k r | }
 n | j | }
 | j	 |
 d
 d | j
 | j d d
 d | S(   Nt
   resid_typet   use_glm_weightst
   fit_kwargsR[   R<   g333333?s   Added variable plotR]   R^   R   i   s
    residuals(   RB   R   R@   R   R-   RK   t   typet   strR   RL   RM   RG   (   R%   t
   focus_exogR  R  R  R/   RB   RR   t   endog_residt   focus_exog_residt   xname(    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR      s    		s6   results: object
	Results for a fitted regression modelc   	      C   s   |  j  } t j | |  \ } } t |  |  } |  j  j d  d   | f } t j |  \ } } | j | | d d d | j d d d t |  t	 k r | } n | j
 | } | j | d d | j d	 d d | S(
   NR[   R<   g333333?s   Partial residuals plotR]   R^   R   i   s   Component plus residual(   RB   R   RA   R   RD   R@   R-   RK   R  R  R   RL   RM   (	   R%   R  R/   RB   t	   focus_colt   prt   focus_exog_valsRR   R  (    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR!     s    		gQ?c   
      C   s   |  j  } t j | |  \ } } t |  | d | d | } | j d  d   | f } t j |  \ }	 } | j | | d d d | j d d d | j | d	 d
 | j	 d d	 d
 |	 S(   NR'   t
   cond_meansR[   R<   g333333?s   CERES residuals plotR]   R^   R   i   s   Component plus residual(
   RB   R   RA   R   RD   R@   R-   RK   RL   RM   (
   R%   R  R'   R  R/   RB   R  t   presidR  RR   (    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR"     s    		c         C   s  |  j  } t | t t t f  s: t d | j j   n  t j	 | |  \ } } t
 t |  j   } t |  } | j |  t |  } | d k r| j d d  | f } | | j d  8} t j j | d  \ }	 }
 } t j |
 d k  } |	 d d  | f } | j d d  | f } t j t |  | j d f  } xe t
 | j d  D]M } | d d  | f } t | | d | d t } | | d d  | f <q\Wn  t j | j d d  | f | f d d } | j } | j   } | | j | |  } | j   } | j | j } t | t t f  rX| | j j  j! | j  9} n  | j d | k r| t j" | d d  | d  f | j |  7} n  | S(	   s  
    Calculate the CERES residuals (Conditional Expectation Partial
    Residuals) for a fitted model.

    Parameters
    ----------
    results : model results instance
        The fitted model for which the CERES residuals are calculated.
    focus_exog : int
        The column of results.model.exog used as the 'focus variable'.
    frac : float, optional
        Lowess smoothing parameter for estimating the conditional
        means.  Not used if `cond_means` is provided.
    cond_means : array-like, optional
        If provided, the columns of this array are the conditional
        means E[exog | focus exog], where exog ranges over some
        or all of the columns of exog other than focus exog.  If
        this is an empty nx0 array, the conditional means are
        treated as being zero.  If None, the conditional means are
        estimated.

    Returns
    -------
    An array containing the CERES residuals.

    Notes
    -----
    If `cond_means` is not provided, it is obtained by smoothing each
    column of exog (except the focus column) against the focus column.

    Currently only supports GLM, GEE, and OLS models.
    s$   ceres residuals not available for %sNi    gư>i   R'   t   return_sortedt   axis(#   RB   R   R   R	   R   R   t	   __class__R   R   RA   R   R   R   R   R   RH   RD   t   meanRE   t   linalgt   svdt   flatnonzeroR   Rj   R   Rl   t   concatenatet   _get_init_kwdsRC   Rw   RI   t   familyt   linkt   derivt   dot(   R%   R  R'   R  RB   R  t   ix_nft   nnft   pexogt   uR   t   vtt   iit   fcolt   jR2   t   cft   new_exogt   klasst   init_kwargst	   new_modelt
   new_resultR  (    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR     s@    "	".	6c         C   s   |  j  } | j |  j   } t | t t f  rP | | j j j |  j	  9} n1 t | t
 t t f  rk n t d t |    t |  t k r | j j |  } n | } |  j | | j d d  | f } | | S(   s:  
    Returns partial residuals for a fitted model with respect to a
    'focus predictor'.

    Parameters
    ----------
    results : results instance
        A fitted regression model.
    focus col : int
        The column index of model.exog with respect to which the
        partial residuals are calculated.

    Returns
    -------
    An array of partial residuals.

    References
    ----------
    RD Cook and R Croos-Dabrera (1998).  Partial residual plots in
    generalized linear models.  Journal of the American Statistical
    Association, 93:442.
    s+   Partial residuals for '%s' not implemented.N(   RB   RC   t   predictR   R   R	   R)  R*  R+  RI   R   R   R   R   R  R  R   Rc   R   RD   (   R%   R  RB   R`   R  t	   focus_val(    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR   k  s    	$c         C   sT  |  j  } t | t t t f  s: t d | j j   n  | j } | j	 } t
 j | |  \ } } | d d  | f }	 | d k r t | t t f  r d } q d } n  t | j d  }
 t |
  }
 |
 j |  | d d  |
 f } |  j |
 } | j } | j   } | | | |  } i | d 6} | d k	 rL| j |  n  | j |   } | j sst d   n  y t | |  } Wn! t k
 rt d |   n Xd	 d l j j } t | t t f  r)| r)| j j |  j  } t | d
  r| | j } n  | j  |	 | |  j   } n | j |	 |  j   } | j! } | | f S(   sY  
    Residualize the endog variable and a 'focus' exog variable in a
    regression model with respect to the other exog variables.

    Parameters
    ----------
    results : regression results instance
        A fitted model including the focus exog and all other
        predictors of interest.
    focus_exog : integer or string
        The column of results.model.exog or a variable name that is
        to be residualized against the other predictors.
    resid_type : string
        The type of residuals to use for the dependent variable.  If
        None, uses `resid_deviance` for GLM/GEE and `resid` otherwise.
    use_glm_weights : bool
        Only used if the model is a GLM or GEE.  If True, the
        residuals for the focus predictor are computed using WLS, with
        the weights obtained from the IRLS calculations for fitting
        the GLM.  If False, unweighted regression is used.
    fit_kwargs : dict, optional
        Keyword arguments to be passed to fit when refitting the
        model.

    Returns
    -------
    endog_resid : array-like
        The residuals for the original exog
    focus_exog_resid : array-like
        The residuals for the focus predictor

    Notes
    -----
    The 'focus variable' residuals are always obtained using linear
    regression.

    Currently only GLM, GEE, and OLS models are supported.
    s8   model type %s not supported for added variable residualsNt   resid_devianceR`   i   t   start_paramss>   fit did not converge when calculating added variable residualss    '%s' residual type not availableit   data_weights("   RB   R   R	   R   R   R   R"  R   RD   RC   R   RA   RH   R   Rj   R   R   R   R(  R   Rw   t	   convergedt   getattrt   AttributeErrort#   statsmodels.regression.linear_modelt
   regressiont   linear_modelR)  t   weightsRI   R   R?  R   R`   (   R%   R  R  R  R  RB   RD   RC   R  R  R2  t   reduced_exogR>  R7  RQ   R9  R   R:  R  t   lmRF  t
   lm_resultsR  (    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyR     sN    )								(;   t   __doc__t   statsmodels.compat.pythonR    R   R   R   t   numpyRE   Rm   R   t   patsyR   RC  R   R   R   t+   statsmodels.genmod.generalized_linear_modelR   t3   statsmodels.genmod.generalized_estimating_equationsR	   t&   statsmodels.sandbox.regression.predstdR
   t   statsmodels.graphicsR   t*   statsmodels.nonparametric.smoothers_lowessR   R   R   t   _regressionplots_docR   R   R   R   R   t   __all__R&   R   RH   R   R   R|   R   Rl   R   R   R   R   R   R  t   formatR   R  R   R    R!   R"   R   R   R   (    (    (    sC   lib/python2.7/site-packages/statsmodels/graphics/regressionplots.pyt   <module>   sx   "(				Ze		dT^i4					Y	0