U
    f                     @   sn   d dl mZmZmZmZmZmZmZmZm	Z	 ddl
mZ ddlZddlZddlZedZG dd deZdS )	   )	convert_to_instanceconvert_to_modelmatch_instance_to_datamatch_model_to_dataconvert_to_instance_with_indexconvert_to_linkIdentityLinkconvert_to_data	DenseData   )KernelExplainer    NZshapc                       s2   e Zd ZdZ fddZdd Zd	ddZ  ZS )
SamplingExplainera   This is an extension of the Shapley sampling values explanation method (aka. IME)

    SamplingExplainer computes SHAP values under the assumption of feature independence and is an
    extension of the algorithm proposed in "An Efficient Explanation of Individual Classifications
    using Game Theory", Erik Strumbelj, Igor Kononenko, JMLR 2010. It is a good alternative to
    KernelExplainer when you want to use a large background set (as opposed to a single reference
    value for example).
    c                    sV   t j}t tj tt| j||f| t | t| j	dksRt
dt| j	 d S )Nidentityz6SamplingExplainer only supports the identity link not )loglevelsetLevelloggingERRORsuperr   __init__strlinkAssertionError)selfmodeldatakwargsr   	__class__ ?/tmp/pip-target-lpfmz8o1/lib/python/shap/explainers/sampling.pyr      s
    
zSamplingExplainer.__init__c              	   K   sd  t |}t|| j t| jj| jks.td| |j| _	t| j	| _
| jr`| j| }n| j|j}t|tjtjfr|jd }|d | _| jst| jg| _| j
dkrtt| jj| jf}tt| jj| jf}nX| j
dkr\tt| jj| jf}tt| jj| jf}| j| j }t| jD ]}|| || j	d |f< q:n|dd| _| jdkrd| j
 | _| jd dkstd|d	d
}	| j}
d}|
| j
|	 kr|
| j
|	  }|
|8 }
tj| j
tjdd |
| j
d   }t|
| j
d  d D ]}||  d7  < qt| j| jf}t| j| jf}t|  | jjj!d f| _"t#| j	D ]H\}}| j$|| jj|j| jj|| d\||d d f< ||d d f< qt|% dkr|d7 }||%  }|| j	d d f &d| 'tj(}tt|D ](}|| d dkr||  d7  < qtt|D ]L}|% |krj||  d8  < n&|% |k r||  d7  < n qqFt|  | jjj!d f| _"t#| j	D ]\}}|| dkr| j$|| jj|j| jj|| d\}}|| ||  }||d d f ||  |||   | ||d d f< ||d d f ||  |||   | ||d d f< qt#| j	D ]4\}}||d d f  t)|| ||    < q~| j|%d | j }t| jD ]j}|d d |f |d d |f    d }|| |||%  d|%     }|d d |f  |7  < q|j!d dkr`|d d df }|S )Nz2SamplingExplainer does not support feature groups!r   r   nsamplesautoi  r    nsamples must be divisible by 2!min_samples_per_featured   )Zdtype)r"   g    .A)*r   r   r   lengroupsPr   Zvarying_groupsxZvaryingIndsMZ
keep_indexr   fZconvert_to_df
isinstancepdZ	DataFrameZSeriesvaluesZfxZ
vector_outnparrayZzerosDZfnullrangegetr"   ZonesZint64maxshapeX_masked	enumeratesampling_estimatesummeanZastypeintsqrt)r   Zincoming_instancer   instanceZ	model_outphiZphi_vardiffdr%   Zround1_samplesZround2_samplesZnsamples_each1iindZnsamples_each2valvarZtotal_samplesZ	sum_errorvZadjr    r    r!   explain   s    


$B$ 
&48.($zSamplingExplainer.explain
   c                 C   sP  |d dkst d| jd |d d f }t|jd }td|d D ]}tj| t||kd d }	tj	|jd }
|||d d f< ||
||	d d  f ||||	d d  f< |||d  d d f< ||
||	d  f ||d  ||	d  f< qH||}|d |d  }||d d  d d d }|| }t
|dt|dfS )Nr   r   r$   r   )r   r7   r0   Zaranger6   r3   randomshufflewhererandintr;   rE   )r   jr,   r*   Xr"   r7   ZindsrB   posZrindZevalsZevals_onZ	evals_offrA   r    r    r!   r9      s     ,,z#SamplingExplainer.sampling_estimate)rH   )__name__
__module____qualname____doc__r   rG   r9   __classcell__r    r    r   r!   r   
   s   		pr   )commonr   r   r   r   r   r   r   r	   r
   Zkernelr   numpyr0   Zpandasr.   r   	getLoggerr   r   r    r    r    r!   <module>   s   ,
