U
    f/                     @   sL   d dl Zd dlZd dlZd dlmZ ddlmZ G dd deZ	dd Z
dS )	    N)tqdm   )	Explainerc                   @   s*   e Zd ZdZd
ddZdd Zdd	 ZdS )LinearExplainera	   Computes SHAP values for a linear model, optionally accounting for inter-feature correlations.

    This computes the SHAP values for a linear model and can account for the correlations among
    the input features. Assuming features are independent leads to interventional SHAP values which
    for a linear model are coef[i] * (x[i] - X.mean(0)[i]) for the ith feature. If instead we account
    for correlations then we prevent any problems arising from colinearity and share credit among
    correlated features. Accounting for correlations can be computationally challenging, but
    LinearExplainer uses sampling to estimate a transform that can then be applied to explain
    any prediction of the model.

    Parameters
    ----------
    model : (coef, intercept) or sklearn.linear_model.*
        User supplied linear model either as either a parameter pair or sklearn object.

    data : (mean, cov), numpy.array, pandas.DataFrame, iml.DenseData or scipy.csr_matrix
        The background dataset to use for computing conditional expectations. Note that only the
        mean and covariance of the dataset are used. This means passing a raw data matrix is just
        a convienent alternative to passing the mean and covariance directly.
    nsamples : int
        Number of samples to use when estimating the transformation matrix used to account for
        feature correlations.
    feature_dependence : "independent" (default) or "correlation"
        There are two ways we might want to compute SHAP values, either the full conditional SHAP
        values or the independent SHAP values. For independent SHAP values we break any
        dependence structure between features in the model and so uncover how the model would behave if we
        intervened and changed some of the inputs. For the full conditional SHAP values we respect
        the correlations among the input features, so if the model depends on one input but that
        input is correlated with another input, then both get some credit for the model's behavior. The
        independent option stays "true to the model" meaning it will only give credit to features that are
        actually used by the model, while the correlation option stays "true to the data" in the sense that
        it only considers how the model would behave when respecting the correlations in the input data.
        For sparse case only independent option is supported.
      Nc           
      C   sf  || _ |dkrtd d}n|d kr4td d}|| _t|tkrht|dkrh|d | _|d | _ntt	|drt	|d	rt|j
jdkr|j
jd dkr|j
d | _|jd | _q|j
| _|j| _ntd
tt| tt|dr|j}t|tkr&t|dkr&|d | _|d | _nt|d kr:tdn`tj|rh|d| _|dkrtdn2tt|d | _|dkrtj|dd| _tj| jstt| jdrtj| jst| j| _| j| jj| j | _nt| j| j| j | _t| j| _|dkr6tt| jdkd | _ | j| j  | _| jd d | j f | j d d f | _| j| j  | _t!| j\| _"}t#t#| j"| j| j"j| _t#| j"| j| _t#|| j| _tj$%| j\}}|& dk r| jt'| jjd d  | _| (|\}}	t#|| j| _)|	| _*n,|dkrV|dkrbtd ntd| d S )NZinterventionalzgThe option feature_dependence="interventional" is has been renamed to feature_dependence="independent"!independentzKThe default value for feature_dependence has been changed to "independent"!   r   r   coef_
intercept_z"An unknown model type was passed: 'pandas.core.frame.DataFrame'>z0A background data distribution must be provided!DOnly feature_dependence = 'independent' is supported for sparse datacorrelationF)Zrowvarzmatrix'>:0yE>gHz>gư>r   zGSetting nsamples has no effect when feature_dependence = 'independent'!z-Unknown type of feature_dependence provided: )+nsampleswarningswarnfeature_dependencetypetuplelencoefZ	intercepthasattrr	   shaper
   	ExceptionstrendswithvaluesmeancovspsparseissparsenparrayflattenZasmatrixdotTZexpected_valueMwherediag
valid_indsduplicate_componentsavg_projmatmulZlinalgZeigmineye_estimate_transformsmean_transformedx_transform)
selfmodeldatar   r   Zsum_proje_mean_transformr2    r9   =/tmp/pip-target-lpfmz8o1/lib/python/shap/explainers/linear.py__init__+   sr    


 






&
$

zLinearExplainer.__init__c              	   C   s  t | j}t||f}t||f}tj|tjd}tt|dD ]*}tj	| td}t|df}t|D ]}	||	 }
|}|}| j
dd|d|	d  f }||
ddf j}t||}| j
|
|
f }|t|j| }t|	d |	d f}|	dkrR|t|||  |ddddf< | |  |dddf< |dddf< d| |d< ||
|
f  | j|
 7  < t| j||	d d  t||||	d d  }||
|d|	d  f  |7  < t| j||	d  t||||	d  }||
|d|	 f  |8  < ||
|
f  | j|
 7  < ||
|d|	d  f  |7  < ||
|d|	 f  |8  < qvqD|| }|| }||fS )	a<   Uses block matrix inversion identities to quickly estimate transforms.

        After a bit of matrix math we can isolate a transform matrix (# features x # features)
        that is independent of any sample we are explaining. It is the result of averaging over
        all feature permutations, but we just use a fixed number of samples to estimate the value.

        TODO: Do a brute force enumeration when # feature subsets is less than nsamples. This could
              happen through a recursive method that uses the same block matrix inversion as below.
        ZdtypezEstimating transformsr   r   r   Nr   )r>   r>   )r   r   r"   zerosZarangeintr   rangerandomshuffler   r&   r-   outer)r3   r   r'   r8   r2   Zindsr7   Zcov_inv_SiSiZcov_SijiZcov_SZ
cov_inv_SSdtZuZ	coef_R_SiZcoef_R_Sr9   r9   r:   r0      sB    



$&6 .  z$LinearExplainer._estimate_transformsc                    s  t t dr j nt t dr2 j t jdksVt jdksVtdjdkrtj	
 rttdtt ddjf jjjjj }t|j}t|jd	 jf}||ddjf< |S jd
krtj	
 rJtjjdkr(tt j jd	 S  fddtjjd	 D S nHtjjdkrrt j j S  fddtjjd	 D S dS )aR   Estimate the SHAP values for a set of samples.

        Parameters
        ----------
        X : numpy.array, pandas.DataFrame or scipy.csr_matrix
            A matrix of samples (# samples x # features) on which to explain the model's output.

        Returns
        -------
        For models with a single output this returns a matrix of SHAP values
        (# samples x # features). Each row sums to the difference between the model output for that
        sample and the expected value of the model output (which is stored as expected_value
        attribute of the explainer).
        zpandas.core.series.Series'>r   r   r   z%Instance must have 1 or 2 dimensions!r   r   Nr   r   c              	      s*   g | ]"}t t  j j| qS r9   )r"   r#   multiplyr   r   .0rF   Xr3   r9   r:   
<listcomp>   s     z/LinearExplainer.shap_values.<locals>.<listcomp>c                    s&   g | ]}t  j j|  qS r9   )r"   r#   r   r   rL   rN   r9   r:   rP      s     )r   r   r   r   r   r   AssertionErrorr   r   r    r!   r   r"   r-   r*   r,   r&   r2   r1   r?   r'   r   r#   rK   r   rA   )r3   rO   phiZfull_phir9   rN   r:   shap_values   s*    $
0"zLinearExplainer.shap_values)r   N)__name__
__module____qualname____doc__r;   r0   rS   r9   r9   r9   r:   r      s   #
T@r   c                 C   s0  t dt t |  }t t || |} t j| jd t jd }d}t| jd D ]r}d}t| jd D ]Z}|| dk rnt d| ||f  | ||f  | ||f  dk rn|s|d7 }d}|||< qnqXt 	t
t || jd f}d|d	< td| jd D ]}d||| |f< q|j|d j|fS )
Nr   r   r<   r>   Fr   r   Tr=   )r"   r)   sqrtr-   Zonesr   r@   rA   absr?   r   uniquer&   sum)CD
componentscountrF   Zfound_grouprE   Zprojr9   r9   r:   r+      s"    >r+   )numpyr"   Zscipyr   r   Ztqdm.autonotebookr   Z	explainerr   r   r+   r9   r9   r9   r:   <module>   s    i