ó
‡ˆ\c           @   s/  d  Z  d d l m Z d d l Z d d l Z d d l m Z d d l m	 Z	 d d l
 m Z m Z d d l m Z d d	 l m Z m Z m Z d d
 l m Z d d l m Z d d l m Z d d l m Z d d l m Z d d d „ Z d d d d d „ Z d d d d d d d d d d d „
 Z  d S(   s-   Partial dependence plots for tree ensembles. iÿÿÿÿ(   t   countN(   t
   mquantilesi   (   t	   cartesian(   t   Parallelt   delayed(   t   six(   t   mapt   ranget   zip(   t   check_array(   t   check_is_fitted(   t   DTYPEi   (   t   _partial_dependence_tree(   t   BaseGradientBoostinggš™™™™™©?gffffffî?id   c         C   s  t  | ƒ d k r! t d ƒ ‚ n  t d „  | Dƒ ƒ sF t d ƒ ‚ n  g  } t |  d | d d ƒ} x• t |  j d ƒ D]€ } t j |  d	 d	 … | f ƒ } | j d | k  r¹ | } n2 t j | d | f | d | f d
 | d t	 ƒ} | j
 | ƒ qx Wt | ƒ | f S(   sÿ  Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    i   s!   percentile must be tuple of len 2c         s   s+   |  ]! } d  | k o  d k n Vq d S(   g        g      ð?N(    (   t   .0t   x(    (    sB   lib/python2.7/site-packages/sklearn/ensemble/partial_dependence.pys	   <genexpr>4   s    s#   percentile values must be in [0, 1]t   probt   axisi    i   Nt   numt   endpoint(   t   lent
   ValueErrort   allR   R   t   shapet   npt   uniquet   linspacet   Truet   appendR   (   t   Xt   percentilest   grid_resolutiont   axest   emp_percentilest   colt   uniquesR   (    (    sB   lib/python2.7/site-packages/sklearn/ensemble/partial_dependence.pyt   _grid_from_X   s    	c      	   C   s‰  t  |  t ƒ s t d ƒ ‚ n  t |  d ƒ | d
 k rC | d
 k s[ | d
 k	 rj | d
 k	 rj t d ƒ ‚ n  t j | d t j d d ƒj ƒ  } t	 g  | D]& } d | k o¸ |  j
 k  n ^ q˜ ƒ rá t d |  j
 d	 ƒ ‚ n  | d
 k	 r0t | d t d d ƒ} t | d
 d
 … | f | | ƒ \ } } nh | d
 k	 sBt ‚ d
 } | j d	 k rs| d
 d
 … t j f } n  | j d k r˜t d | j ƒ ‚ n  t j | d t d d ƒ} | j d	 | j d k sÓt ‚ |  j j d	 } |  j j d }	 t j | | j d f d t j d d ƒ}
 x^ t |	 ƒ D]P } xG t | ƒ D]9 } |  j | | f j } t | | | |  j |
 | ƒ q>Wq+W|
 | f S(   s8  Partial dependence of ``target_variables``.

    Partial dependence plots show the dependence between the joint values
    of the ``target_variables`` and the function represented
    by the ``gbrt``.

    Read more in the :ref:`User Guide <partial_dependence>`.

    Parameters
    ----------
    gbrt : BaseGradientBoosting
        A fitted gradient boosting model.
    target_variables : array-like, dtype=int
        The target features for which the partial dependecy should be
        computed (size should be smaller than 3 for visual renderings).
    grid : array-like, shape=(n_points, len(target_variables))
        The grid of ``target_variables`` values for which the
        partial dependecy should be evaluated (either ``grid`` or ``X``
        must be specified).
    X : array-like, shape=(n_samples, n_features)
        The data on which ``gbrt`` was trained. It is used to generate
        a ``grid`` for the ``target_variables``. The ``grid`` comprises
        ``grid_resolution`` equally spaced points between the two
        ``percentiles``.
    percentiles : (low, high), default=(0.05, 0.95)
        The lower and upper percentile used create the extreme values
        for the ``grid``. Only if ``X`` is not None.
    grid_resolution : int, default=100
        The number of equally spaced points on the ``grid``.

    Returns
    -------
    pdp : array, shape=(n_classes, n_points)
        The partial dependence function evaluated on the ``grid``.
        For regression and binary classification ``n_classes==1``.
    axes : seq of ndarray or None
        The axes with which the grid has been created or None if
        the grid has been given.

    Examples
    --------
    >>> samples = [[0, 0, 2], [1, 0, 0]]
    >>> labels = [0, 1]
    >>> from sklearn.ensemble import GradientBoostingClassifier
    >>> gb = GradientBoostingClassifier(random_state=0).fit(samples, labels)
    >>> kwargs = dict(X=samples, percentiles=(0, 1), grid_resolution=2)
    >>> partial_dependence(gb, [0], **kwargs) # doctest: +SKIP
    (array([[-4.52...,  4.52...]]), [array([ 0.,  1.])])
    s2   gbrt has to be an instance of BaseGradientBoostingt   estimators_s"   Either grid or X must be specifiedt   dtypet   ordert   Ci    s#   target_variables must be in [0, %d]i   Ni   s   grid must be 2d but is %dd(   t
   isinstanceR   R   R
   t   NoneR   t   asarrayt   int32t   ravelt   anyt   n_features_R	   R   R$   t   AssertionErrort   ndimt   newaxisR   R%   t   zerost   float64R   t   tree_R   t   learning_rate(   t   gbrtt   target_variablest   gridR   R   R   t   fxR    t   n_trees_per_staget   n_estimatorst   pdpt   staget   kt   tree(    (    sB   lib/python2.7/site-packages/sklearn/ensemble/partial_dependence.pyt   partial_dependenceH   s@    309 "	i   i    c   1         sø  d d l  j } d d l m } d d l m } d d l m } t ˆ t ƒ s] t	 d ƒ ‚ n  t
 ˆ d ƒ t ˆ d ƒ rð t j ˆ j ƒ d	 k rð | d k r¬ t	 d
 ƒ ‚ n  t j ˆ j | ƒ } ˆ j | | k rö t	 d t | ƒ ƒ ‚ qö n d } t ˆ  d t d d ƒ‰  ˆ j ˆ  j d k r3t	 d ƒ ‚ n  | d k rOi d d 6} n  | d k rdi  } n  ˆ d k r›g  t ˆ j ƒ D] } t | ƒ ^ q€‰ n! t ˆ t j ƒ r¼ˆ j ƒ  ‰ n  ‡ f d †  } g  } xÌ | D]Ä } t | t j f t j ƒ r| f } n  y5 t j g  | D] } | | ƒ ^ qd t j ƒ} Wn t k
 rZt	 d ƒ ‚ n Xd t j | ƒ k o{d	 k n st	 d ƒ ‚ n  | j  | ƒ qØW| } g  } yJ xC | D]; } g  } x | D] } | j  ˆ | ƒ qÉW| j  | ƒ q¶WWn/ t! k
 r't	 d j" t# ˆ ƒ | ƒ ƒ ‚ n Xt$ d | d |	 ƒ ‡  ‡ ‡ ‡ f d †  | Dƒ ƒ } i  } x | D]… \ } } | | j% ƒ  | | j& ƒ  } } t# | ƒ }  | j' |  | | f ƒ \ }! }" t% | |! ƒ } t& | |" ƒ } | | f | |  <qiWd	 | k rt j( d d | d	 Œ }# n  |
 d k r8| j) |   }$ n |
 j* ƒ  }$ |$ j+ ƒ  t% | t# | ƒ ƒ } t, t j- t# | ƒ t. | ƒ ƒ ƒ }% g  }& x/t/ t0 ƒ  | | | ƒ D]\ } } }' \ } } |$ j1 |% | | d ƒ }
 t# | ƒ d k r|
 j2 | d | | j3 ƒ  |  nâ t# | ƒ d	 k s&t4 ‚ t j5 | d | d ƒ \ }( }) | | j6 t7 t8 t j | ƒ ƒ ƒ j9 }* |
 j: |( |) |* d |# d d d  d! ƒ}+ |
 j; |( |) |* d |# d" |# d d# |# d d$ d% | |
 j< |+ d& d' d  d! d( d) d* t= ƒt> ˆ  d d … | d f d+ t j? d, d- d, ƒ ƒ}, | j@ |
 jA |
 jB ƒ }- |
 jC ƒ  }. |
 jD |, d g d. d/ |- d d! ƒ|
 jE |' d ƒ |
 jF |. ƒ |
 jG jH | d0 d1 d2 d3 ƒ ƒ | ƒ  }/ |/ jI dB ƒ |
 jG jJ |/ ƒ t# | ƒ d k r|t> ˆ  d d … | d f d+ t j? d, d- d, ƒ ƒ}, | j@ |
 jB |
 jA ƒ }- |
 jK ƒ  }0 |
 jL |, d g d. d/ |- d d! ƒ|
 jM |' d ƒ |
 jN |0 ƒ n |
 jM d6 ƒ t# | ƒ d k r¯|
 jF | d ƒ n  |& j  |
 ƒ q§W|$ jO d7 d8 d9 d: d; d, d< d= d> d? d@ dA ƒ |$ |& f S(C   s:  Partial dependence plots for ``features``.

    The ``len(features)`` plots are arranged in a grid with ``n_cols``
    columns. Two-way partial dependence plots are plotted as contour
    plots.

    Read more in the :ref:`User Guide <partial_dependence>`.

    Parameters
    ----------
    gbrt : BaseGradientBoosting
        A fitted gradient boosting model.
    X : array-like, shape=(n_samples, n_features)
        The data on which ``gbrt`` was trained.
    features : seq of ints, strings, or tuples of ints or strings
        If seq[i] is an int or a tuple with one int value, a one-way
        PDP is created; if seq[i] is a tuple of two ints, a two-way
        PDP is created.
        If feature_names is specified and seq[i] is an int, seq[i]
        must be < len(feature_names).
        If seq[i] is a string, feature_names must be specified, and
        seq[i] must be in feature_names.
    feature_names : seq of str
        Name of each feature; feature_names[i] holds
        the name of the feature with index i.
    label : object
        The class label for which the PDPs should be computed.
        Only if gbrt is a multi-class model. Must be in ``gbrt.classes_``.
    n_cols : int
        The number of columns in the grid plot (default: 3).
    grid_resolution : int, default=100
        The number of equally spaced points on the axes.
    percentiles : (low, high), default=(0.05, 0.95)
        The lower and upper percentile used to create the extreme values
        for the PDP axes.
    n_jobs : int or None, optional (default=None)
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.
    verbose : int
        Verbose output during PD computations. Defaults to 0.
    ax : Matplotlib axis object, default None
        An axis object onto which the plots will be drawn.
    line_kw : dict
        Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.
        For one-way partial dependence plots.
    contour_kw : dict
        Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.
        For two-way partial dependence plots.
    **fig_kw : dict
        Dict with keywords passed to the figure() call.
        Note that all keywords not recognized above will be automatically
        included here.

    Returns
    -------
    fig : figure
        The Matplotlib Figure object.
    axs : seq of Axis objects
        A seq of Axis objects, one for each subplot.

    Examples
    --------
    >>> from sklearn.datasets import make_friedman1
    >>> from sklearn.ensemble import GradientBoostingRegressor
    >>> X, y = make_friedman1()
    >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)
    >>> fig, axs = plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP
    ...
    iÿÿÿÿN(   t
   transforms(   t   MaxNLocator(   t   ScalarFormatters2   gbrt has to be an instance of BaseGradientBoostingR%   t   classes_i   s&   label is not given for multi-class PDPs!   label %s not in ``gbrt.classes_``i    R&   R'   R(   i   s*   X.shape[1] does not match gbrt.n_features_t   greent   colorc            sP   t  |  t j ƒ rL y ˆ  j |  ƒ }  WqL t k
 rH t d |  ƒ ‚ qL Xn  |  S(   Ns   Feature %s not in feature_names(   R)   R   t   string_typest   indexR   (   R:   (   t   feature_names(    sB   lib/python2.7/site-packages/sklearn/ensemble/partial_dependence.pyt   convert_feature  s    s5   features must be either int, str, or tuple of int/strs)   target features must be either one or twosL   All entries of features must be less than len(feature_names) = {0}, got {1}.t   n_jobst   verbosec      
   3   s6   |  ], } t  t ƒ ˆ | d  ˆ  d ˆ d ˆ ƒVq d S(   R   R   R   N(   R   RA   (   R   t   fxs(   R   R7   R   R   (    sB   lib/python2.7/site-packages/sklearn/ensemble/partial_dependence.pys	   <genexpr>=  s   R   i   t   levelst
   linewidthsg      à?t   colorsR?   t   vmaxt   vmint   alphag      è?t   fmts   %2.2ft   fontsizei
   t   inlineR   gš™™™™™¹?g      ð?gš™™™™™©?t	   transformt   nbinsi   t   prunet   loweriýÿÿÿi   s   Partial dependencet   bottomg333333Ã?t   topgffffffæ?t   leftt   rightgffffffî?t   wspacegš™™™™™Ù?t   hspaceg333333Ó?(   iýÿÿÿi   (P   t   matplotlib.pyplott   pyplott
   matplotlibRB   t   matplotlib.tickerRC   RD   R)   R   R   R
   t   hasattrR   t   sizeRE   R*   t   searchsortedt   strR	   R   R/   R   R   t   ndarrayt   tolistt   numberst   IntegralR   RH   t   arrayR,   t	   TypeErrorR   t
   IndexErrort   formatR   R   t   mint   maxt   getR   t   figuret
   get_figuret   cleart   intt   ceilt   floatR   R    t   add_subplott   plotR-   R0   t   meshgridt   reshapet   listR   t   Tt   contourt   contourft   clabelR   R   t   aranget   blended_transform_factoryt	   transDatat	   transAxest   get_ylimt   vlinest
   set_xlabelt   set_ylimt   xaxist   set_major_locatort   set_powerlimitst   set_major_formattert   get_xlimt   hlinest
   set_ylabelt   set_xlimt   subplots_adjust(1   R7   R   t   featuresRJ   t   labelt   n_colsR   R   RL   RM   t   axt   line_kwt
   contour_kwt   fig_kwt   pltRB   RC   RD   t	   label_idxt   iRK   t   tmp_featuresRN   R:   t   namest   lt	   pd_resultt   pdp_limR=   R    t   min_pdt   max_pdt   n_fxt
   old_min_pdt
   old_max_pdt   Z_levelt   figt   n_rowst   axst   namet   XXt   YYt   Zt   CSt   decilest   transt   ylimt   tick_formattert   xlim(    (   R   RJ   R7   R   R   sB   lib/python2.7/site-packages/sklearn/ensemble/partial_dependence.pyt   plot_partial_dependence¦   sÖ    K'	+	5%	!
%$ (	"%5"	5"'(   gš™™™™™©?gffffffî?(   gš™™™™™©?gffffffî?(   gš™™™™™©?gffffffî?(!   t   __doc__t	   itertoolsR    Rl   t   numpyR   t   scipy.stats.mstatsR   t   utils.extmathR   t   utils._joblibR   R   t	   externalsR   t   externals.six.movesR   R   R   t   utilsR	   t   utils.validationR
   t
   tree._treeR   t   _gradient_boostingR   t   gradient_boostingR   R$   R*   RA   R·   (    (    (    sB   lib/python2.7/site-packages/sklearn/ensemble/partial_dependence.pyt   <module>   s*   0]		