ó
—W[c           @   sè  d  Z  d d l m Z d d l Z d d l m Z d d l Z d d l	 Z
 d d l j Z d d l j Z d d l m Z d d l Z d d l m Z d d l m Z y d d l j j Z e Z Wn e k
 rÙ e Z n Xd d	 l  m! Z! m" Z" d d
 l# m$ Z$ m% Z% m& Z& m' Z' d d d g Z( d „  Z) d e e e d d d d d d e e d d d d „ Z+ e d „ Z, e d „ Z- d „  Z. d „  Z/ d „  Z0 d „  Z1 d e e d d d d d e e e e d d d d „ Z2 d d d d „ Z3 d S(   s1   Plotting functions for visualizing distributions.iÿÿÿÿ(   t   divisionN(   t   stats(   t   LineCollection(   t   LooseVersion(   t   string_typesi   (   t   iqrt   _kde_support(   t   color_palettet   light_palettet   dark_palettet   blend_palettet   distplott   kdeplott   rugplotc         C   s   t  j |  ƒ }  t |  ƒ d k  r% d Sd t |  ƒ t |  ƒ d } | d k re t t  j |  j ƒ ƒ St t  j |  j ƒ  |  j	 ƒ  | ƒ ƒ Sd S(   s;   Calculate number of hist bins using Freedman-Diaconis rule.i   i   i   i    NgUUUUUUÕ?(
   t   npt   asarrayt   lenR   t   intt   sqrtt   sizet   ceilt   maxt   min(   t   at   h(    (    s4   lib/python2.7/site-packages/seaborn/distributions.pyt   _freedman_diaconis_bins   s    c      	      sœ  | d k r t j ƒ  } n  t | ƒ } | d k rc t |  d ƒ rc |  j } | d k	 rc t } qc n  t j |  ƒ }  |  j	 d k r |  j
 ƒ  }  n  | p¥ | p¥ ˆ  d k	 } | d k rÀ t ƒ  } n  | d k rØ t ƒ  } n  | d k rð t ƒ  } n  |	 d k rt ƒ  }	 n  |
 d k rl| r8| j d |  j ƒ  ƒ \ } n | j |  j ƒ  d ƒ \ } | j ƒ  }
 | j ƒ  n  | d k	 rÇ| r‹| | d <qÇ| rž| | d <qÇ| r±| | d <qÇˆ  rÇ| |	 d <qÇn  | rž| d k rñt t |  ƒ d ƒ } n  | j d d ƒ t t j ƒ t d ƒ k  r/| j d	 | ƒ n | j d
 | ƒ | rKd n d } | j d |
 ƒ } | j |  | d | d | | | |
 k rž| | d <qžn  | rñ| j d |
 ƒ } t |  d | d | d | | | |
 k rñ| | d <qñn  | rV| j d |
 ƒ } | rd n d } t |  d | d | d | | | |
 k rV| | d <qVn  ˆ  d k	 rl‡  ‡ f d †  } |	 j d d ƒ } |	 j d d ƒ } |	 j d d ƒ } |	 j d t j t j f ƒ } t j |  ƒ j ƒ  |  j d d ƒ } t |  | | | | ƒ } ˆ  j  |  ƒ ‰ | | ƒ } | r7| | } } n  | j | | d | |	 | d k rl| |	 d <qln  | r˜| rˆ| j! | ƒ q˜| j" | ƒ n  | S(   sŒ  Flexibly plot a univariate distribution of observations.

    This function combines the matplotlib ``hist`` function (with automatic
    calculation of a good default bin size) with the seaborn :func:`kdeplot`
    and :func:`rugplot` functions. It can also fit ``scipy.stats``
    distributions and plot the estimated PDF over the data.

    Parameters
    ----------

    a : Series, 1d-array, or list.
        Observed data. If this is a Series object with a ``name`` attribute,
        the name will be used to label the data axis.
    bins : argument for matplotlib hist(), or None, optional
        Specification of hist bins, or None to use Freedman-Diaconis rule.
    hist : bool, optional
        Whether to plot a (normed) histogram.
    kde : bool, optional
        Whether to plot a gaussian kernel density estimate.
    rug : bool, optional
        Whether to draw a rugplot on the support axis.
    fit : random variable object, optional
        An object with `fit` method, returning a tuple that can be passed to a
        `pdf` method a positional arguments following an grid of values to
        evaluate the pdf on.
    {hist, kde, rug, fit}_kws : dictionaries, optional
        Keyword arguments for underlying plotting functions.
    color : matplotlib color, optional
        Color to plot everything but the fitted curve in.
    vertical : bool, optional
        If True, observed values are on y-axis.
    norm_hist : bool, optional
        If True, the histogram height shows a density rather than a count.
        This is implied if a KDE or fitted density is plotted.
    axlabel : string, False, or None, optional
        Name for the support axis label. If None, will try to get it
        from a.namel if False, do not set a label.
    label : string, optional
        Legend label for the relevent component of the plot
    ax : matplotlib axis, optional
        if provided, plot on this axis

    Returns
    -------
    ax : matplotlib Axes
        Returns the Axes object with the plot for further tweaking.

    See Also
    --------
    kdeplot : Show a univariate or bivariate distribution with a kernel
              density estimate.
    rugplot : Draw small vertical lines to show each observation in a
              distribution.

    Examples
    --------

    Show a default plot with a kernel density estimate and histogram with bin
    size determined automatically with a reference rule:

    .. plot::
        :context: close-figs

        >>> import seaborn as sns, numpy as np
        >>> sns.set(); np.random.seed(0)
        >>> x = np.random.randn(100)
        >>> ax = sns.distplot(x)

    Use Pandas objects to get an informative axis label:

    .. plot::
        :context: close-figs

        >>> import pandas as pd
        >>> x = pd.Series(x, name="x variable")
        >>> ax = sns.distplot(x)

    Plot the distribution with a kernel density estimate and rug plot:

    .. plot::
        :context: close-figs

        >>> ax = sns.distplot(x, rug=True, hist=False)

    Plot the distribution with a histogram and maximum likelihood gaussian
    distribution fit:

    .. plot::
        :context: close-figs

        >>> from scipy.stats import norm
        >>> ax = sns.distplot(x, fit=norm, kde=False)

    Plot the distribution on the vertical axis:

    .. plot::
        :context: close-figs

        >>> ax = sns.distplot(x, vertical=True)

    Change the color of all the plot elements:

    .. plot::
        :context: close-figs

        >>> sns.set_color_codes()
        >>> ax = sns.distplot(x, color="y")

    Pass specific parameters to the underlying plot functions:

    .. plot::
        :context: close-figs

        >>> ax = sns.distplot(x, rug=True, rug_kws={"color": "g"},
        ...                   kde_kws={"color": "k", "lw": 3, "label": "KDE"},
        ...                   hist_kws={"histtype": "step", "linewidth": 3,
        ...                             "alpha": 1, "color": "g"})

    t   namei   i    t   labeli2   t   alphagš™™™™™Ù?s   2.2t   normedt   densityt
   horizontalt   verticalt   colort   orientationt   axt   yt   xt   axisc            s   ˆ  j  |  ˆ Œ S(   N(   t   pdf(   R%   (   t   fitt   params(    s4   lib/python2.7/site-packages/seaborn/distributions.pyR'   ô   s    s   #282828t   gridsizeiÈ   t   cuti   t   clipt   ddofN(#   t   Nonet   pltt   gcat   boolt   hasattrR   t   TrueR   R   t   ndimt   squeezet   dictt   plott   meant	   get_colort   removeR   R   t
   setdefaultR   t   mplt   __version__t   popt   histR   R   t   infR   t   gaussian_kdet   scotts_factort   stdR   R(   t
   set_ylabelt
   set_xlabel(   R   t   binsR?   t   kdet   rugR(   t   hist_kwst   kde_kwst   rug_kwst   fit_kwsR!   R    t	   norm_histt   axlabelR   R#   t   label_axt   lineR"   t
   hist_colort	   kde_colort	   rug_colorR&   R'   t	   fit_colorR*   R+   R,   t   bwR%   R$   (    (   R(   R)   s4   lib/python2.7/site-packages/seaborn/distributions.pyR   *   sœ    {	
%c         K   sÂ  | d k r" t j t j f } n  t rR t |  | | | | | d |
 ƒ\ } } n^ | d k r} d } d } t j | t ƒ n  |
 r’ t d ƒ ‚ n  t	 |  | | | | ƒ \ } } t j
 t j t j | ƒ | f d d ƒ} | rñ | | } } n  | j d d ƒ } | d k r*t |  d ƒ r*|  j } n  | d k	 o9| } | d k rNd	 n | } | j d
 d ƒ } |	 j | | |  \ } | j ƒ  } | j ƒ  | j d d ƒ | d k r¶| n | } |	 j | | d | d | | t d
 | d | j d d ƒ d | j d t ƒ d | j d d ƒ ƒ } | r^| rE|	 j | d | |  q^|	 j | d | |  n  | rz|	 j d d d ƒn |	 j d d d ƒ|	 j ƒ  \ } } | r¾| r¾|	 j d d ƒ n  |	 S(   s=   Plot a univariate kernel density estimate on one of the axes.t
   cumulativet   gaus-   Kernel other than `gau` requires statsmodels.sa   Cumulative distributions are currentlyonly implemented in statsmodels.Please install statsmodels.R&   i   R   R   t
   _nolegend_t	   facecolorR!   R   g      Ð?t   clip_ont   zorderi    t   autot   loct   bestN(   R.   R   R@   t   _has_statsmodelst   _statsmodels_univariate_kdet   warningst   warnt   UserWarningt   ImportErrort   _scipy_univariate_kdet   amaxt   c_t
   zeros_likeR>   R2   R   R7   R9   R:   R6   t   getR3   t   fill_betweenxt   fill_betweent   set_xlimt   set_ylimt   get_legend_handles_labelst   legend(   t   datat   shadeR    t   kernelRU   R*   R+   R,   Ro   R#   RV   t   kwargsR%   R$   t   msgR   RY   RP   R!   t	   shade_kwst   handlest   labels(    (    s4   lib/python2.7/site-packages/seaborn/distributions.pyt   _univariate_kdeplot  sX    +
c      
   C   sy   | d k } t  j |  ƒ } | j | | | d | d | d | ƒ| r\ | j | j }	 }
 n | j | j }	 }
 |	 |
 f S(   s?   Compute a univariate kernel density estimate using statsmodels.RW   R*   R+   R,   (   t   smnpt   KDEUnivariateR(   t   supportt   cdfR   (   Rp   Rr   RU   R*   R+   R,   RV   t   fftRG   t   gridR$   (    (    s4   lib/python2.7/site-packages/seaborn/distributions.pyR`   ^  s    %c   	      C   sÜ   y t  j |  d | ƒ} WnE t k
 r` t  j |  ƒ } | d k ra d } t j | t ƒ qa n Xt | t ƒ r® | d k r‚ d n | } t | d | ƒ ƒ  t	 j
 |  ƒ } n  t |  | | | | ƒ } | | ƒ } | | f S(   s9   Compute a univariate kernel density estimate using scipy.t	   bw_methodt   scottsM   Ignoring bandwidth choice, please upgrade scipy to use a different bandwidth.t   scottss	   %s_factor(   R   RA   t	   TypeErrorRa   Rb   Rc   t
   isinstanceR   t   getattrR   RC   R   (	   Rp   RU   R*   R+   R,   RG   Rt   R~   R$   (    (    s4   lib/python2.7/site-packages/seaborn/distributions.pyRe   k  s    &c         K   s0  | d k r5 t j t j f t j t j f g } n$ t j | ƒ d k rY | | g } n  t r† t |  | | | | | ƒ \ } } } n$ t |  | | | | | ƒ \ } } } | j d d ƒ } | j g  g  ƒ \ } | j	 ƒ  } | j
 ƒ  | j d | ƒ } | j d d ƒ } | d k rG| r2t | d t ƒ} qGt | d t ƒ} n  t | t ƒ rº| j d ƒ r¥d g } | j t | j d d	 ƒ d
 ƒ ƒ t | d t ƒ} qºt j j | ƒ } n  | j d d ƒ } | | d <| rå| j n | j } | | | | | |  } | r*| r*| j d j d ƒ n  | | d <|
 rn| d k rLi  n | } | j j | | | |  n  t |  d ƒ r–|	 r–| j |  j  ƒ n  t | d ƒ r¾|	 r¾| j! | j  ƒ n  | d k	 r,| d k râ| d ƒ n | } | r| j" g  g  d | d | ƒq,| j g  g  d | d | ƒn  | S(   s6   Plot a joint KDE estimate as a bivariate contour plot.i   t   n_levelsi
   R!   t   cmapt   as_cmapt   _ds   #333333t   _ri   R   i    R   gffffffî?N(#   R.   R   R@   R4   R_   t   _statsmodels_bivariate_kdet   _scipy_bivariate_kdeR>   R7   R9   R:   R   R3   R	   Rƒ   R   t   endswitht   extendR   t   replaceR
   R<   t   cmt   get_cmapt   contourft   contourt   collectionst	   set_alphat   figuret   colorbarR2   RE   R   RD   Rk   (   R%   R$   t   filledt   fill_lowestRr   RU   R*   R+   R,   RN   t   cbart   cbar_axt   cbar_kwsR#   Rs   t   xxt   yyt   zR…   t   scoutt   default_colorR!   R†   t   palR   t   contour_funct   csett   legend_color(    (    s4   lib/python2.7/site-packages/seaborn/distributions.pyt   _bivariate_kdeplot}  sV    )'$
	"

c         C   sY  t  | t ƒ rL t t j d | ƒ } | |  ƒ } | | ƒ } | | g } n t j | ƒ rj | | g } n  t  |  t j ƒ rˆ |  j	 }  n  t  | t j ƒ r¦ | j	 } n  t j
 |  | g d | ƒ }	 t |  |	 j d | | | d ƒ }
 t | |	 j d | | | d ƒ } t j |
 | ƒ \ } } |	 j | j ƒ  | j ƒ  g ƒ j | j ƒ } | | | f S(   s*   Compute a bivariate kde using statsmodels.t   bw_t   cci    i   (   Rƒ   R   R„   Ry   t
   bandwidthsR   t   isscalart   pdt   Seriest   valuest   KDEMultivariateR   RU   t   meshgridR'   t   ravelt   reshapet   shape(   R%   R$   RU   R*   R+   R,   t   bw_funct   x_bwt   y_bwRG   t	   x_supportt	   y_supportRœ   R   Rž   (    (    s4   lib/python2.7/site-packages/seaborn/distributions.pyRŠ   À  s"    ##-c         C   s  t  j |  | f } t j | j d | ƒ} | j d d d d ƒ } t | t ƒ r© | d k rd d n | } t | d | ƒ ƒ  | d }	 t | d | ƒ ƒ  | d }
 n1 t  j	 | ƒ rÈ | | }	 }
 n d	 } t
 | ƒ ‚ t | d
 d
 … d f |	 | | | d ƒ } t | d
 d
 … d f |
 | | | d ƒ } t  j | | ƒ \ } } | | j ƒ  | j ƒ  g ƒ j | j ƒ } | | | f S(   s$   Compute a bivariate kde using scipy.R   R&   i    R-   i   R€   R   s	   %s_factorso   Cannot specify a different bandwidth for each dimension with the scipy backend. You should install statsmodels.N(   R   Rg   R   RA   t   TRC   Rƒ   R   R„   R©   t
   ValueErrorR   R®   R¯   R°   R±   (   R%   R$   RU   R*   R+   R,   Rp   RG   t   data_stdt   bw_xt   bw_yRt   Rµ   R¶   Rœ   R   Rž   (    (    s4   lib/python2.7/site-packages/seaborn/distributions.pyR‹   ×  s     !,,*RW   R€   id   i   c         K   s3  | d k r t j ƒ  } n  t |  t ƒ r< t j |  ƒ }  n  t |  ƒ d k rR | S|  j t j	 ƒ }  | d k	 r¦ t | t ƒ r‘ t j | ƒ } n  | j t j	 ƒ } n  t
 } t
 } t |  t j ƒ r÷ t j |  ƒ d k r÷ t } t } |  j \ } } n t |  t j ƒ ret j |  ƒ d k ret } t } |  j d d … d f j } |  j d d … d f j } n! | d k	 r†t } |  } | } n  | r¥d } t j | t ƒ n  | rÀ|
 rÀt d ƒ ‚ n  | rÿt | | | | | | | | | |	 | | | | |  } n0 t |  | | | | | | | |	 | d |
 | 
} | S(   sÎ  Fit and plot a univariate or bivariate kernel density estimate.

    Parameters
    ----------
    data : 1d array-like
        Input data.
    data2: 1d array-like, optional
        Second input data. If present, a bivariate KDE will be estimated.
    shade : bool, optional
        If True, shade in the area under the KDE curve (or draw with filled
        contours when data is bivariate).
    vertical : bool, optional
        If True, density is on x-axis.
    kernel : {'gau' | 'cos' | 'biw' | 'epa' | 'tri' | 'triw' }, optional
        Code for shape of kernel to fit with. Bivariate KDE can only use
        gaussian kernel.
    bw : {'scott' | 'silverman' | scalar | pair of scalars }, optional
        Name of reference method to determine kernel size, scalar factor,
        or scalar for each dimension of the bivariate plot. Note that the
        underlying computational libraries have different interperetations
        for this parameter: ``statsmodels`` uses it directly, but ``scipy``
        treats it as a scaling factor for the standard deviation of the
        data.
    gridsize : int, optional
        Number of discrete points in the evaluation grid.
    cut : scalar, optional
        Draw the estimate to cut * bw from the extreme data points.
    clip : pair of scalars, or pair of pair of scalars, optional
        Lower and upper bounds for datapoints used to fit KDE. Can provide
        a pair of (low, high) bounds for bivariate plots.
    legend : bool, optional
        If True, add a legend or label the axes when possible.
    cumulative : bool, optional
        If True, draw the cumulative distribution estimated by the kde.
    shade_lowest : bool, optional
        If True, shade the lowest contour of a bivariate KDE plot. Not
        relevant when drawing a univariate plot or when ``shade=False``.
        Setting this to ``False`` can be useful when you want multiple
        densities on the same Axes.
    cbar : bool, optional
        If True and drawing a bivariate KDE plot, add a colorbar.
    cbar_ax : matplotlib axes, optional
        Existing axes to draw the colorbar onto, otherwise space is taken
        from the main axes.
    cbar_kws : dict, optional
        Keyword arguments for ``fig.colorbar()``.
    ax : matplotlib axes, optional
        Axes to plot on, otherwise uses current axes.
    kwargs : key, value pairings
        Other keyword arguments are passed to ``plt.plot()`` or
        ``plt.contour{f}`` depending on whether a univariate or bivariate
        plot is being drawn.

    Returns
    -------
    ax : matplotlib Axes
        Axes with plot.

    See Also
    --------
    distplot: Flexibly plot a univariate distribution of observations.
    jointplot: Plot a joint dataset with bivariate and marginal distributions.

    Examples
    --------

    Plot a basic univariate density:

    .. plot::
        :context: close-figs

        >>> import numpy as np; np.random.seed(10)
        >>> import seaborn as sns; sns.set(color_codes=True)
        >>> mean, cov = [0, 2], [(1, .5), (.5, 1)]
        >>> x, y = np.random.multivariate_normal(mean, cov, size=50).T
        >>> ax = sns.kdeplot(x)

    Shade under the density curve and use a different color:

    .. plot::
        :context: close-figs

        >>> ax = sns.kdeplot(x, shade=True, color="r")

    Plot a bivariate density:

    .. plot::
        :context: close-figs

        >>> ax = sns.kdeplot(x, y)

    Use filled contours:

    .. plot::
        :context: close-figs

        >>> ax = sns.kdeplot(x, y, shade=True)

    Use more contour levels and a different color palette:

    .. plot::
        :context: close-figs

        >>> ax = sns.kdeplot(x, y, n_levels=30, cmap="Purples_d")

    Use a narrower bandwith:

    .. plot::
        :context: close-figs

        >>> ax = sns.kdeplot(x, bw=.15)

    Plot the density on the vertical axis:

    .. plot::
        :context: close-figs

        >>> ax = sns.kdeplot(y, vertical=True)

    Limit the density curve within the range of the data:

    .. plot::
        :context: close-figs

        >>> ax = sns.kdeplot(x, cut=0)

    Add a colorbar for the contours:

    .. plot::
        :context: close-figs

        >>> ax = sns.kdeplot(x, y, cbar=True)

    Plot two shaded bivariate densities:

    .. plot::
        :context: close-figs

        >>> iris = sns.load_dataset("iris")
        >>> setosa = iris.loc[iris.species == "setosa"]
        >>> virginica = iris.loc[iris.species == "virginica"]
        >>> ax = sns.kdeplot(setosa.sepal_width, setosa.sepal_length,
        ...                  cmap="Reds", shade=True, shade_lowest=False)
        >>> ax = sns.kdeplot(virginica.sepal_width, virginica.sepal_length,
        ...                  cmap="Blues", shade=True, shade_lowest=False)

    i    i   Nsš   Passing a 2D dataset for a bivariate plot is deprecated in favor of kdeplot(x, y), and it will cause an error in future versions. Please update your code.sK   Cumulative distribution plots are notsupported for bivariate distributions.RV   (   R.   R/   R0   Rƒ   t   listR   R   R   t   astypet   float64t   Falset   ndarrayR4   R3   R·   Rª   t	   DataFramet   ilocR¬   Ra   Rb   Rc   R‚   R¥   Rx   (   Rp   t   data2Rq   R    Rr   RU   R*   R+   R,   Ro   RV   t   shade_lowestR™   Rš   R›   R#   Rs   Rb   t	   bivariateR%   R$   t   warn_msg(    (    s4   lib/python2.7/site-packages/seaborn/distributions.pyR   í  sN    —''	gš™™™™™©?R%   c         K   s¬  | d k r t j ƒ  } n  t j |  ƒ }  | j d | d k ƒ } t d d d d d d ƒ } x< | j ƒ  D]. \ } } | | k rj | j | ƒ | | <qj qj W| j d d	 ƒ | rt	 j
 | j | j ƒ }	 t j t j d
 | g t |  ƒ ƒ t j |  d ƒ g ƒ }
 nQ t	 j
 | j | j ƒ }	 t j t j |  d ƒ t j d
 | g t |  ƒ ƒ g ƒ }
 |
 j t |  ƒ d d g ƒ } | j t | d |	 | ƒ | j d | d | ƒ | S(   s=  Plot datapoints in an array as sticks on an axis.

    Parameters
    ----------
    a : vector
        1D array of observations.
    height : scalar, optional
        Height of ticks as proportion of the axis.
    axis : {'x' | 'y'}, optional
        Axis to draw rugplot on.
    ax : matplotlib axes, optional
        Axes to draw plot into; otherwise grabs current axes.
    kwargs : key, value pairings
        Other keyword arguments are passed to ``LineCollection``.

    Returns
    -------
    ax : matplotlib axes
        The Axes object with the plot on it.

    R    R$   t	   linewidtht   lwt	   linestylet   lsR!   t   ci   i    i   t	   transformt   scalext   scaleyN(   R.   R/   R0   R   R   R>   R6   t   itemsR;   t   txt   blended_transform_factoryt	   transAxest	   transDatat   column_stackt   tileR   t   repeatR°   t   add_collectionR   t   autoscale_view(   R   t   heightR&   R#   Rs   R    t	   alias_mapt   attrt   aliast   transt   xy_pairst	   line_segs(    (    s4   lib/python2.7/site-packages/seaborn/distributions.pyR   ¸  s(    !$(4   t   __doc__t
   __future__R    t   numpyR   t   scipyR   t   pandasRª   t
   matplotlibR<   t   matplotlib.pyplott   pyplotR/   t   matplotlib.transformst
   transformsRÐ   t   matplotlib.collectionsR   Ra   t   distutils.versionR   t   sixR   t   statsmodels.nonparametric.apit   nonparametrict   apiRy   R3   R_   Rd   R¿   t   utilsR   R   t   palettesR   R   R	   R
   t   __all__R   R.   R   Rx   R`   Re   R¥   RŠ   R‹   R   R   (    (    (    s4   lib/python2.7/site-packages/seaborn/distributions.pyt   <module>   sF   

"	âP		C		È