ó
áp7]c           @   s   d  Z  d d l m Z m Z m Z m Z d d l Z d d l m	 Z	 d d l
 j j Z e j d d d d d d d d d d „
 Z d „  Z d S(	   s@   
Authors:    Josef Perktold, Skipper Seabold, Denis A. Engemann
iÿÿÿÿ(   t   iterkeyst   lranget   zipt	   iteritemsN(   t   rainbowt   bt   bestc         K   sX  d d l  m } t j | ƒ \ } } | p: t | d d ƒ } d | j | f } | pe t |  d d ƒ } | p} t | d d ƒ } | j | ƒ | j | ƒ d } } t	 |  d t
 ƒ rg  t j |  ƒ D] } | ^ qÇ } t t | ƒ ƒ } t |  t t | | ƒ ƒ ƒ }  n  | t d	 |  d
 | d | ƒ ƒ } | j d
 d	 g ƒ j | ƒ j ƒ  } t | d
 j ƒ  ƒ } |
 d k r€d g | n |
 }
 |	 d k rŸd g | n |	 }	 | d k r½t | ƒ n | } t |
 ƒ | k rät d ƒ ‚ n  t |	 ƒ | k rt d ƒ ‚ n  t | ƒ | k r&t d ƒ ‚ n  | d k s>| d k rÆxÁt | j d
 g ƒ ƒ D]h \ } \ } } t
 | d
 j d ƒ } | j | d	 | d d | | d |	 | d | d |
 | | qWWn<| d k sÞ| d k r\x!t | j d
 g ƒ ƒ D]^ \ } \ } } t
 | d
 j d ƒ } | j | d	 | d d | | d | d |
 | | q÷Wn¦ | d k st| d k ròx‹ t | j d
 g ƒ ƒ D]^ \ } \ } } t
 | d
 j d ƒ } | j | d	 | d d | | d | d |	 | | qWn t d | ƒ ‚ | j d | d | ƒ | j d ƒ t | | g ƒ rT| j | ƒ | j | ƒ n  | S(   s  
    Interaction plot for factor level statistics.

    Note. If categorial factors are supplied levels will be internally
    recoded to integers. This ensures matplotlib compatiblity.

    uses pandas.DataFrame to calculate an `aggregate` statistic for each
    level of the factor or group given by `trace`.

    Parameters
    ----------
    x : array-like
        The `x` factor levels constitute the x-axis. If a `pandas.Series` is
        given its name will be used in `xlabel` if `xlabel` is None.
    trace : array-like
        The `trace` factor levels will be drawn as lines in the plot.
        If `trace` is a `pandas.Series` its name will be used as the
        `legendtitle` if `legendtitle` is None.
    response : array-like
        The reponse or dependent variable. If a `pandas.Series` is given
        its name will be used in `ylabel` if `ylabel` is None.
    func : function
        Anything accepted by `pandas.DataFrame.aggregate`. This is applied to
        the response variable grouped by the trace levels.
    plottype : str {'line', 'scatter', 'both'}, optional
        The type of plot to return. Can be 'l', 's', or 'b'
    ax : axes, optional
        Matplotlib axes instance
    xlabel : str, optional
        Label to use for `x`. Default is 'X'. If `x` is a `pandas.Series` it
        will use the series names.
    ylabel : str, optional
        Label to use for `response`. Default is 'func of response'. If
        `response` is a `pandas.Series` it will use the series names.
    colors : list, optional
        If given, must have length == number of levels in trace.
    linestyles : list, optional
        If given, must have length == number of levels in trace.
    markers : list, optional
        If given, must have length == number of lovels in trace
    kwargs
        These will be passed to the plot command used either plot or scatter.
        If you want to control the overall plotting options, use kwargs.

    Returns
    -------
    fig : Figure
        The figure given by `ax.figure` or a new instance.

    Examples
    --------
    >>> import numpy as np
    >>> np.random.seed(12345)
    >>> weight = np.random.randint(1,4,size=60)
    >>> duration = np.random.randint(1,3,size=60)
    >>> days = np.log(np.random.randint(1,30, size=60))
    >>> fig = interaction_plot(weight, duration, days,
    ...             colors=['red','blue'], markers=['D','^'], ms=10)
    >>> import matplotlib.pyplot as plt
    >>> plt.show()

    .. plot::

       import numpy as np
       from statsmodels.graphics.factorplots import interaction_plot
       np.random.seed(12345)
       weight = np.random.randint(1,4,size=60)
       duration = np.random.randint(1,3,size=60)
       days = np.log(np.random.randint(1,30, size=60))
       fig = interaction_plot(weight, duration, days,
                   colors=['red','blue'], markers=['D','^'], ms=10)
       import matplotlib.pyplot as plt
       #plt.show()
    iÿÿÿÿ(   t	   DataFramet   namet   responses   %s of %st   Xt   Tracei    t   xt   tracet   -t   .s(   Must be a linestyle for each trace levels%   Must be a marker for each trace levels$   Must be a color for each trace levelt   bothR   t   colort   markert   labelt	   linestylet   linet   lt   scattert   ss   Plot type %s not understoodt   loct   titlegš™™™™™¹?N(    t   pandasR   t   utilst   create_mpl_axt   getattrt   __name__t
   set_ylabelt
   set_xlabelt   Nonet
   isinstancet   strt   npt   uniqueR   t   lent   _recodet   dictR   t   groupbyt	   aggregatet   reset_indexR   t
   ValueErrort	   enumeratet   valuest   plotR   t   legendt   marginst   allt
   set_xtickst   set_xticklabels(   R   R   R	   t   funct   axt   plottypet   xlabelt   ylabelt   colorst   markerst
   linestylest	   legendloct   legendtitlet   kwargsR   t   figt   response_namet   x_valuest   x_levelsR   t   datat	   plot_datat   n_tracet   iR/   t   groupR   (    (    s?   lib/python2.7/site-packages/statsmodels/graphics/factorplots.pyt   interaction_plot   s`    O
"!!$+!+!+!c         C   sH  d d l  m } d
 } d
 } t |  | ƒ rI |  j } |  j } |  j }  n  |  j j t	 j
 t	 j g k rv t d ƒ ‚ nÎ t | t ƒ s” t d ƒ ‚ n° t	 j |  ƒ t	 j t t | ƒ ƒ ƒ k j ƒ  sÓ t d ƒ ‚ nq t	 j |  j d d t	 j ƒ} x* t | ƒ D] \ } } | | |  | k <qÿ W| r@| | d | d	 | ƒ} n  | Sd
 S(   s9   Recode categorial data to int factor.

    Parameters
    ----------
    x : array-like
        array like object supporting with numpy array methods of categorially
        coded data.
    levels : dict
        mapping of labels to integer-codings

    Returns
    -------
    out : instance numpy.ndarray

    iÿÿÿÿ(   t   Seriess<   This is not a categorial factor. Array of str type required.s4   This is not a valid value for levels. Dict required.s)   The levels do not match the array values.i    t   dtypeR   t   indexN(   R   RK   R"   R#   R   RM   R/   RL   t   typeR%   t   str_t   object_R-   R)   R&   t   listR    R3   t   emptyt   shapet   intR   (   R   t   levelsRK   R   RM   t   outt   levelt   coding(    (    s?   lib/python2.7/site-packages/statsmodels/graphics/factorplots.pyR(      s&    		0(   t   __doc__t   statsmodels.compat.pythonR    R   R   R   t   numpyR%   t   statsmodels.graphics.plottoolsR   t   statsmodels.graphics.utilst   graphicsR   t   meanR"   RJ   R(   (    (    (    s?   lib/python2.7/site-packages/statsmodels/graphics/factorplots.pyt   <module>   s   "