U
    Ùf M  ã                   @   sŠ   d Z ddlmZ ddlZddlZddlmZ zddlm	Z
 W n ek
rZ   e d¡ Y nX ddlmZ ddlmZ ddd„Zdd„ ZdS )z6 Summary plots of SHAP values across a whole dataset.
é    )ÚdivisionN)Úgaussian_kdezmatplotlib could not be loaded!é   )Úlabels)Úcolorsú#333333Té   c           V         sÔ  d}t ˆtƒr0d}|dkrd}|dksRtdƒ‚n"|dkr<d}tˆjƒdksRtdƒ‚|dkr€|d	krhd
}n|rz‡fdd„}ntj}tt|ƒƒdkr¦ˆ dkrž|j	‰ |j
}nBt |tƒrÂˆ dkr¼|‰ d}n&|dk	rèt|jƒdkrèˆ dkrè|‰ d}|rúˆd jd nˆjd }ˆ dkr&t dd„ t|ƒD ƒ¡‰ |sºtˆjƒdkrº|dkrLd}nttˆ ƒ|ƒ}t t ˆ d¡¡ d¡ ¡}dˆjd d  }t ˆ|¡}t ˆd| ¡}tt|ƒt|ƒƒ}| }|}tjd| d d| d fd t d|d¡ ˆdd…|d |f }|dd…dd…f  d9  < t||dk	r@|dd…|f ndˆ | dddd|d t ||f¡ t d¡ d}t tˆ |d  |ƒ¡ tdtt|ƒ|ƒƒD ]è}|| }t d||d ¡ ˆdd…||f }|d9 }|dd…|f  d  < t||dk	r|dd…|f ndddd„ ttˆ ƒƒD ƒddd|d t ||f¡ t d¡ |tt|ƒ|ƒd krrt td ¡ t tˆ | |ƒ¡ q tjdddd  tjdd!d" |	r¶t ¡  dS |dkrÈd#}|
r.|rút tjtj t ˆ¡dd$dd$¡}nt tjt ˆ¡dd$¡}|t|t|ƒƒ d… }nt !t "t||ƒ¡d¡}d%}|rlt #¡  $d&t|ƒ| d ¡ tj%dd'd(d) |dkr’t&|ƒD ]ü\}}tj'|d*d+d,d(d- ˆdd…|f }|dkrÊdn|dd…|f }t "t|ƒ¡}tj( )|¡ |dk	r|| }|| }d} ztj|tj*d.}W n   d} Y nX t|ƒ}!d}"t +|"|t |¡  t |¡t |¡ d/  ¡}#t |#tj( ,|!¡d0  ¡}d}$d(}%t -|!¡}&|D ]J}|#| |%krºd}$t .|$d ¡|$d d d  |&|< |$d7 }$|#| }%q¤|&d1|t |&d ¡  9 }&|dk	rZ| rZt |d2¡}'t |d3¡}(|'|(krrt |d¡}'t |d4¡}(|'|(krrt |¡}'t |¡}(|jd t|ƒksŽtd5ƒ‚t /|¡})tj0||) ||&|)  d6|'|(d7|ddt|ƒd8kd9
 |t 1|)¡  2tj*¡}*|* 3¡ }+|'|( d: |+t /|*¡< |(|*|+|(k< |'|*|+|'k < tj0|t 1|)¡ ||&t 1|)¡  tj4|'|(d7|*|ddt|ƒd8kd; n0tj0|||& d7|dd| rx|nd6t|ƒd8kd< qŽn(|d=kržt&|ƒD ]\}}tj'|d*d+d,d(d- q¤|dk	r>t ˆdd…dtˆ ƒ…f  5¡ d¡},t ˆdd…dtˆ ƒ…f  5¡ d4¡}-t&|ƒD ]\}}ˆdd…|f }t |¡t |¡ }.}/|/|. }0t 6t |¡|0d>  t |¡|0d>  d¡}1t 7|¡|-|, d k rÂt8|tj( ,t|ƒ¡|-|,  d  ƒ|1ƒ}2nt8|ƒ|1ƒ}2|2t |2¡d  }2|dd…|f }td?t|ƒd# ƒ}3t -t|1ƒd ¡}4t |¡}d}5d}6d}7d}8tt|1ƒd ƒD ]¸}9|6t|ƒk 	r¨|1|9 |||6  k	r¨|7|||6  7 }7|6d7 }6|6|5 d#k	rB|7|||5  8 }7|5d7 }5	qB|6|5 dk	rì|7|6|5  |4|9< t|8ƒD ]}:|4|9 |4|9|: d < 	qÎn|8d7 }8	q>t |d2¡}'t |d3¡}(|'|(k
rPt |d¡}'t |d4¡}(|'|(k
rPt |¡}'t |¡}(tj0|t 9ˆjd ¡| d@tj4|'|(||dddA
 |4|'8 }4|(|' dk
r¢|4|(|'  }4tt|1ƒd ƒD ]„}|2| dBk
sÖ|2|d  dBk
r²tj:|1| |1|d  g||2|  ||2|d   g||2|  ||2|d   gt 4|4| ¡ddC 
q²qn\tj;ˆdd…|f tt|ƒƒdDddEddddF};|;dG D ]$}<|< <|¡ |< =dH¡ |< >|¡ qtn|d	kr2dD}=t 6d|jd |d ¡ +d¡ 2dI¡}>t ˆ¡t ˆ¡ }.}/t 6|.|/|=¡}?t&|ƒD ]"\}}|dd…|f }@tj?|@ddJ\}A}B|Ajd |kr`t |A¡}Ct @|B|C ¡}Dt A|Ddd¡}Dn|>}D|Djd d }"t |@¡}Ct 9|=¡| }Et -|"|=f¡}&t|"ƒD ]ì}ˆ|C|D| |D|d  … |f }|jd dkrtB CdK|ˆ | f ¡ |dkr |&|d dd…f |&|dd…f< q t8|tj(jDddL|jd dM ƒ|?ƒ|&|dd…f< |D|d  |D|  }F|jd |" }G|F|G }H|&|dd…f  |H9  < q tj@|&dd$}&d}I|& ¡ d |I }Jt|"d d(d(ƒD ]\}|&|dd…f |J }K|tjEjFkrüt G|¡||"d  ƒn|}Ltj:|?||K ||K |LdN qÀqüt |.|/¡ nˆ|s²|dkr²|d|… }Mt "t|Mƒ¡}Nt ˆ¡  d¡}OtjH|N|O|M dEdO|dP tjI|NdQdR t J¡  K‡ fdSd„|MD ƒ¡ n|rº|dkrº|dkrâdTd„ ttˆƒƒD ƒ}|d|… }Mt "t|Mƒ¡}Nt -t|Mƒ¡}Pt ‡fdUd„ttˆƒƒD ƒ¡}Qt&|QƒD ]N\}}t ˆ| ¡  d¡}OtjH|N|O|M dE|PdO||ƒ|| dV |P|O|M 7 }Pq2tjI|NdQdR t J¡  K‡ fdWd„|MD ƒ¡ tjLddXdY |r¼|dk	r¼|dkr¼|d	ksì|tjEjFkr¼ddlMmE}R |RjN|d	krtj4nt G|¡dZ}S|S Oddg¡ tjP|Sddgd[d\}T|T Qtd] td^ g¡ |TjRtd_ dXdd` |TjSjTddda |T >d¡ |TjU Vd¡ |TjS W¡  Xt #¡ jY Z¡ ¡}U|TjS [|Uj\d1 d# ¡ t J¡ j] ^db¡ t J¡ j_ ^dH¡ t J¡ j`dc  Vd¡ t J¡ j`dd  Vd¡ t J¡ j`de  Vd¡ t J¡ jT||df tjItt|ƒƒ‡ fdgd„|D ƒdQdR |dkrpt J¡ jTdhd#d+didj t J¡ jTdkddl t ad(t|ƒ¡ |dkr°tjtdm dQdR ntjtdn dQdR |	rÐt ¡  dS )oa2  Create a SHAP summary plot, colored by feature values when they are provided.

    Parameters
    ----------
    shap_values : numpy.array
        For single output explanations this is a matrix of SHAP values (# samples x # features).
        For multi-output explanations this is a list of such matrices of SHAP values.

    features : numpy.array or pandas.DataFrame or list
        Matrix of feature values (# samples x # features) or a feature_names list as shorthand

    feature_names : list
        Names of the features (length # features)

    max_display : int
        How many top features to include in the plot (default is 20, or 7 for interaction plots)

    plot_type : "dot" (default for single output), "bar" (default for multi-output), or "violin"
        What type of summary plot to produce. 
    FTNÚbarzBOnly plot_type = 'bar' is supported for multi-output explanations!Údotr   z9Summary plots need a matrix of shap_values, not a vector.Zlayered_violinZcoolwarmc                    s   t  | tˆ ƒ ¡S )N)r   Zred_blue_circleÚlen)Úi©Úshap_values© ú9/tmp/pip-target-lpfmz8o1/lib/python/shap/plots/summary.pyÚ<lambda>:   ó    zsummary_plot.<locals>.<lambda>z%<class 'pandas.core.frame.DataFrame'>r   c                 S   s   g | ]}t d  t|ƒ ‘qS )ZFEATURE)r   Ústr©Ú.0r   r   r   r   Ú
<listcomp>N   s     z summary_plot.<locals>.<listcomp>é   é   g      ð?é   éd   g      ø?gš™™™™™é?)Zfigsize)Úfeature_namesÚsortÚshowÚ	color_barÚauto_size_plotÚmax_displayÚ é   c                 S   s   g | ]}d ‘qS )r!   r   r   r   r   r   r   y   s     )r   r   r   r   r   r    ZINTERACTION_VALUEg        )ÚpadZw_padZh_padgš™™™™™¹?)ZhspaceZwspacer   )Zaxisgš™™™™™Ù?é   z#999999éÿÿÿÿ)ÚxÚcolorÚzorderz#ccccccg      à?)r   é   )Úyr'   ZlwÚdashesr(   )Zdtypeg:Œ0âŽyE>gíµ ÷Æ°>gÍÌÌÌÌÌì?r)   é_   éc   z<Feature and SHAP matrices must have the same number of rows!z#777777é   iô  )r'   ÚvminÚvmaxÚsÚalphaÚ	linewidthr(   Ú
rasterizedg       @)	Úcmapr/   r0   r1   Úcr2   r3   r(   r4   )r1   r2   r3   r(   r'   r4   Úviolingš™™™™™É?é
   é	   )r1   r5   r/   r0   r6   r2   r3   r(   gš™™™™™©?)r'   r(   éÈ   gffffffæ?)ZpointsZvertÚwidthsZ	showmeansZshowextremaZshowmediansZbodiesÚnoneÚint)Zreturn_countszmnot enough data in bin #%d for feature %s, so it'll be ignored. Try increasing the number of records to plot.gü©ñÒMbP?)ÚlocÚscaleÚsize)Z	facecolorÚcenter)Úalignr'   é   )Úfontsizec                    s   g | ]}ˆ | ‘qS r   r   r   ©r   r   r   r   i  s     c                 S   s   g | ]}d t |ƒ ‘qS )zClass )r   r   r   r   r   r   m  s     c                    s    g | ]}t  ˆ | ¡ ¡  ‘qS r   )ÚnpÚabsÚmeanr   r   r   r   r   r  s     )ÚleftrB   r'   Úlabelc                    s   g | ]}ˆ | ‘qS r   r   r   rE   r   r   r   {  s     é   )ZframeonrD   )r5   iè  )ZticksZaspectZFEATURE_VALUE_LOWZFEATURE_VALUE_HIGHZFEATURE_VALUE)r@   Zlabelpad)Ú	labelsizeÚlengthÚbottomÚrightÚtoprI   )r'   Z
labelcolorc                    s   g | ]}ˆ | ‘qS r   r   r   rE   r   r   r   ”  s     r*   Úmajor)rM   ÚwidthÚwhichr&   )rL   ZGLOBAL_VALUEZVALUE)bÚ
isinstanceÚlistÚAssertionErrorr   Úshaper   Zblue_rgbr   ÚtypeÚcolumnsÚvaluesrF   ÚarrayÚrangeÚminZargsortrG   ÚsumZnanpercentileÚmaxÚplZfigureZsubplotÚsummary_plotZxlimZxlabelÚtitleÚshorten_textr   Ztight_layoutZsubplots_adjustr   rH   ÚflipZarangeZgcfZset_size_inchesZaxvlineÚ	enumerateZaxhlineÚrandomÚshuffleZfloat64ÚroundZrandnZzerosÚceilÚisnanZscatterÚinvertZastypeÚcopyZred_blueÚflattenZlinspaceZstdr   ZonesZfill_betweenZ
violinplotZset_facecolorZset_edgecolorZ	set_alphaÚuniqueZcumsumÚinsertÚwarningsÚwarnÚnormalÚcmZdatadZget_cmapZbarhZyticksZgcaZset_yticklabelsZlegendZmatplotlib.cmZScalarMappableZ	set_arrayZcolorbarZset_ticklabelsZ	set_labelZaxZtick_paramsÚoutlineZset_visibleZget_window_extentZtransformedZdpi_scale_transÚinvertedZ
set_aspectÚheightZxaxisZset_ticks_positionZyaxisZspinesZylim)Vr   Úfeaturesr   r    Z	plot_typer'   Z
axis_colorrb   r2   r   r   r   r   Zlayered_violin_max_num_binsZclass_namesZmulti_classZnum_featuresZ	sort_indsÚdeltaZslowZshighÚvZproj_shap_valuesZtitle_length_limitr   ÚindZfeature_orderÚ
row_heightÚposZshapsrZ   ZindsZcolored_featureÚNZnbinsZquantÚlayerZlast_binZysr/   r0   Znan_maskZcvalsZ	cvals_impZ
global_lowZglobal_highZshap_minZshap_maxÚrngZxsZdsZwindow_sizeZsmooth_valuesZtrailing_posZleading_posZrunning_sumZ	back_fillÚjÚkÚpartsZpcZnum_x_pointsZbinsZx_pointsÚfeaturern   ÚcountsÚorderZ	thesebinsZy0r@   Zbin_size_if_evenZrelative_bin_sizerR   r?   r*   r6   Zfeature_indsZy_posZglobal_shap_valuesZleft_posZ
class_indsrs   ÚmÚcbZbboxr   )r   r   r   ra      s¬   



    û
 ù	

&


0
"




    
þ       
ý 
ÿ

$$**
$



"   ÿ 0 þ   þ


ÿ

 
ÿÿ
 0
ÿÿÿ" 
      þÿ
ÿ"
&

ra   c                 C   s(   t | ƒ|kr | d |d … d S | S d S )Nr   z...)r   )ÚtextZlength_limitr   r   r   rc      s    rc   )NNNNNr   Nr   TTTTr   N)Ú__doc__Ú
__future__r   rp   ÚnumpyrF   Zscipy.statsr   Zmatplotlib.pyplotZpyplotr`   ÚImportErrorrq   r!   r   r   ra   rc   r   r   r   r   Ú<module>   s6   
                   þ
   