B
    	\
<              
   @   s   d Z ddlmZ ddlZddlZddlmZ ddlm	Z	 ddl
mZmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ dddZdddZdddZdS )z-Partial dependence plots for tree ensembles.     )countN)
mquantiles   )	cartesian)Paralleldelayed)six)maprangezip)check_array)check_is_fitted)DTYPE   )_partial_dependence_tree)BaseGradientBoostingg?gffffff?d   c             C   s   t |dkrtdtdd |D s.tdg }t| |dd}xlt| jd D ]Z}t| d	d	|f }|jd |k r~|}n"tj|d|f |d|f |d
d}|	| qPW t
||fS )a  Generate a grid of points based on the ``percentiles of ``X``.

    The grid is generated by placing ``grid_resolution`` equally
    spaced points between the ``percentiles`` of each column
    of ``X``.

    Parameters
    ----------
    X : ndarray
        The data
    percentiles : tuple of floats
        The percentiles which are used to construct the extreme
        values of the grid axes.
    grid_resolution : int
        The number of equally spaced points that are placed
        on the grid.

    Returns
    -------
    grid : ndarray
        All data points on the grid; ``grid.shape[1] == X.shape[1]``
        and ``grid.shape[0] == grid_resolution * X.shape[1]``.
    axes : seq of ndarray
        The axes with which the grid has been created.
    r   z!percentile must be tuple of len 2c             s   s&   | ]}d |  kodkn  V  qdS )g        g      ?N ).0xr   r   Blib/python3.7/site-packages/sklearn/ensemble/partial_dependence.py	<genexpr>4   s    z_grid_from_X.<locals>.<genexpr>z#percentile values must be in [0, 1]r   )probaxisr   NT)numZendpoint)len
ValueErrorallr   r
   shapenpuniquelinspaceappendr   )Xpercentilesgrid_resolutionaxesZemp_percentilescolZuniquesr   r   r   r   _grid_from_X   s    

r)   c          	      s  t  tstdt d |dkr,|dks<|dk	rD|dk	rDtdtj|tjdd }t fdd|D rtd	 j	d
  |dk	rt
|tdd}t|dd|f ||\}}nD|dk	std}|jd
kr|ddtjf }|jdkrtd|j tj|tdd}|jd
 |jd ks$t jjd
 } jjd }tj||jd ftjdd}	xLt|D ]@}
x8t|D ],} j|
|f j}t||| j|	|  qpW qbW |	|fS )a8  Partial dependence of ``target_variables``.

    Partial dependence plots show the dependence between the joint values
    of the ``target_variables`` and the function represented
    by the ``gbrt``.

    Read more in the :ref:`User Guide <partial_dependence>`.

    Parameters
    ----------
    gbrt : BaseGradientBoosting
        A fitted gradient boosting model.
    target_variables : array-like, dtype=int
        The target features for which the partial dependecy should be
        computed (size should be smaller than 3 for visual renderings).
    grid : array-like, shape=(n_points, len(target_variables))
        The grid of ``target_variables`` values for which the
        partial dependecy should be evaluated (either ``grid`` or ``X``
        must be specified).
    X : array-like, shape=(n_samples, n_features)
        The data on which ``gbrt`` was trained. It is used to generate
        a ``grid`` for the ``target_variables``. The ``grid`` comprises
        ``grid_resolution`` equally spaced points between the two
        ``percentiles``.
    percentiles : (low, high), default=(0.05, 0.95)
        The lower and upper percentile used create the extreme values
        for the ``grid``. Only if ``X`` is not None.
    grid_resolution : int, default=100
        The number of equally spaced points on the ``grid``.

    Returns
    -------
    pdp : array, shape=(n_classes, n_points)
        The partial dependence function evaluated on the ``grid``.
        For regression and binary classification ``n_classes==1``.
    axes : seq of ndarray or None
        The axes with which the grid has been created or None if
        the grid has been given.

    Examples
    --------
    >>> samples = [[0, 0, 2], [1, 0, 0]]
    >>> labels = [0, 1]
    >>> from sklearn.ensemble import GradientBoostingClassifier
    >>> gb = GradientBoostingClassifier(random_state=0).fit(samples, labels)
    >>> kwargs = dict(X=samples, percentiles=(0, 1), grid_resolution=2)
    >>> partial_dependence(gb, [0], **kwargs) # doctest: +SKIP
    (array([[-4.52...,  4.52...]]), [array([ 0.,  1.])])
    z2gbrt has to be an instance of BaseGradientBoostingestimators_Nz"Either grid or X must be specifiedC)dtypeorderc                s(   g | ] }d |  ko j k n   qS )r   )n_features_)r   fx)gbrtr   r   
<listcomp>   s    z&partial_dependence.<locals>.<listcomp>z#target_variables must be in [0, %d]r   r   zgrid must be 2d but is %ddr   )
isinstancer   r   r   r    Zasarrayint32ravelanyr.   r   r   r)   AssertionErrorndimZnewaxisr   r*   ZzerosZfloat64r
   Ztree_r   Zlearning_rate)r0   Ztarget_variablesZgridr$   r%   r&   r'   Zn_trees_per_stageZn_estimatorspdpZstagekZtreer   )r0   r   partial_dependenceH   s@    3

 


r:      c       0   	      s  ddl m} ddlm} ddlm} ddlm} ttsBt	dt
d tdrtjd	kr|dkrvt	d
tj|}j| |krt	dt| nd}t tdd j jd krt	d|dkrddi}|dkri }dkr
dd tjD nttjr  fddg }x|D ]}t|tjftj rV|f}y"tjfdd|D tjd}W n tk
r   t	dY nX dt|  krd	ksn t	d|| q6W |}g }y@x:|D ]2}g }x|D ]}||  qW || qW W n* t k
rB   t	d!t"|Y nX t#||	d fdd|D }i }xh|D ]`\}}|| $ || %  }}t"|}|&|||f\}} t$||}t%|| }||f||< qrW d	|krtj'|d	 ddi}!|
dkr|j(f |}"n|
) }"|"*  t$|t"|}t+t,t"|t-| }#g }$xFt.t/ |||D ]0\}}%}&\}}|"0|#||d }
t"|dkr|
j1|d || 2 f| nt"|d	kst3t4|d |d \}'}(|| 5t6t7tj|j8})|
j9|'|(|)|!d d!d"}*|
j:|'|(|)f|!|!d# |!d d$d%| |
j;|*d&d!d'd(d) t< dd|%d f t=d*d+d*d,}+|>|
j?|
j@},|
A }-|
jB|+dgd-|,d!d. |
C|&d  |
D|- |
jEF|d/d0d1 | }.|.Gd2 |
jEH|. t"|dkr\t< dd|%d f t=d*d+d*d,}+|>|
j@|
j?},|
I }/|
jJ|+dgd-|,d!d. |
K|&d  |
L|/ n
|
Kd3 t"|dkr|
D|d  |$|
 q\W |"jMd4d5d*d6d7d8d9 |"|$fS ):a:  Partial dependence plots for ``features``.

    The ``len(features)`` plots are arranged in a grid with ``n_cols``
    columns. Two-way partial dependence plots are plotted as contour
    plots.

    Read more in the :ref:`User Guide <partial_dependence>`.

    Parameters
    ----------
    gbrt : BaseGradientBoosting
        A fitted gradient boosting model.
    X : array-like, shape=(n_samples, n_features)
        The data on which ``gbrt`` was trained.
    features : seq of ints, strings, or tuples of ints or strings
        If seq[i] is an int or a tuple with one int value, a one-way
        PDP is created; if seq[i] is a tuple of two ints, a two-way
        PDP is created.
        If feature_names is specified and seq[i] is an int, seq[i]
        must be < len(feature_names).
        If seq[i] is a string, feature_names must be specified, and
        seq[i] must be in feature_names.
    feature_names : seq of str
        Name of each feature; feature_names[i] holds
        the name of the feature with index i.
    label : object
        The class label for which the PDPs should be computed.
        Only if gbrt is a multi-class model. Must be in ``gbrt.classes_``.
    n_cols : int
        The number of columns in the grid plot (default: 3).
    grid_resolution : int, default=100
        The number of equally spaced points on the axes.
    percentiles : (low, high), default=(0.05, 0.95)
        The lower and upper percentile used to create the extreme values
        for the PDP axes.
    n_jobs : int or None, optional (default=None)
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.
    verbose : int
        Verbose output during PD computations. Defaults to 0.
    ax : Matplotlib axis object, default None
        An axis object onto which the plots will be drawn.
    line_kw : dict
        Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.
        For one-way partial dependence plots.
    contour_kw : dict
        Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.
        For two-way partial dependence plots.
    **fig_kw : dict
        Dict with keywords passed to the figure() call.
        Note that all keywords not recognized above will be automatically
        included here.

    Returns
    -------
    fig : figure
        The Matplotlib Figure object.
    axs : seq of Axis objects
        A seq of Axis objects, one for each subplot.

    Examples
    --------
    >>> from sklearn.datasets import make_friedman1
    >>> from sklearn.ensemble import GradientBoostingRegressor
    >>> X, y = make_friedman1()
    >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)
    >>> fig, axs = plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP
    ...
    r   N)
transforms)MaxNLocator)ScalarFormatterz2gbrt has to be an instance of BaseGradientBoostingr*   classes_r   z&label is not given for multi-class PDPz!label %s not in ``gbrt.classes_``r+   )r,   r-   r   z*X.shape[1] does not match gbrt.n_features_colorZgreenc             S   s   g | ]}t |qS r   )str)r   ir   r   r   r1     s    z+plot_partial_dependence.<locals>.<listcomp>c                s@   t | tjr<y | } W n  tk
r:   td|  Y nX | S )NzFeature %s not in feature_names)r2   r   string_typesindexr   )r/   )feature_namesr   r   convert_feature  s    z0plot_partial_dependence.<locals>.convert_featurec                s   g | ]} |qS r   r   )r   r/   )rF   r   r   r1   #  s    )r,   z5features must be either int, str, or tuple of int/strz)target features must be either one or twozLAll entries of features must be less than len(feature_names) = {0}, got {1}.)n_jobsverbosec             3   s$   | ]}t t| d V  qdS ))r$   r&   r%   N)r   r:   )r   fxs)r$   r0   r&   r%   r   r   r   =  s   z*plot_partial_dependence.<locals>.<genexpr>r      g      ?r9   )levelsZ
linewidthscolorsg      ?)rK   ZvmaxZvminZalphaz%2.2f
   T)ZfmtrL   ZfontsizeZinlineg?g      ?)r   g?)Z	transformr@      lower)ZnbinsZprune)   zPartial dependenceg333333?gffffff?gffffff?g?g333333?)ZbottomtopleftrightZwspaceZhspace)NZmatplotlib.pyplotZpyplotZ
matplotlibr<   Zmatplotlib.tickerr=   r>   r2   r   r   r   hasattrr    sizer?   ZsearchsortedrA   r   r   r.   r   r
   ZndarraytolistnumbersZIntegralr   rC   Zarrayr3   	TypeErrorr#   
IndexErrorformatr   r   minmaxgetr"   ZfigureZ
get_figureclearintZceilfloatr   r   Zadd_subplotZplotr4   r6   ZmeshgridZreshapelistr	   TZcontourZcontourfZclabelr   ZarangeZblended_transform_factoryZ	transDataZ	transAxesZget_ylimZvlinesZ
set_xlabelZset_ylimZxaxisZset_major_locatorZset_powerlimitsZset_major_formatterZget_xlimZhlinesZ
set_ylabelZset_xlimZsubplots_adjust)0r0   r$   ZfeaturesrE   ZlabelZn_colsr&   r%   rG   rH   ZaxZline_kwZ
contour_kwZfig_kwZpltr<   r=   r>   Z	label_idxZtmp_featuresrI   nameslrB   Z	pd_resultZpdp_limr8   r'   Zmin_pdZmax_pdZn_fxZ
old_min_pdZ
old_max_pdZZ_levelZfigZn_rowsZaxsr/   nameZXXZYYZZCSZdecilesZtransZylimZtick_formatterZxlimr   )r$   rF   rE   r0   r&   r%   r   plot_partial_dependence   s    K


	
" 







&

&
ri   )r   r   )NNr   r   )
NNr;   r   r   Nr   NNN) __doc__	itertoolsr   rY   Znumpyr    Zscipy.stats.mstatsr   Zutils.extmathr   Zutils._joblibr   r   Z	externalsr   Zexternals.six.movesr	   r
   r   Zutilsr   Zutils.validationr   Z
tree._treer   Z_gradient_boostingr   Zgradient_boostingr   r)   r:   ri   r   r   r   r   <module>   s*   
0 
]    