Goal

  • test deeplift importance scoring with BPNet
In [1]:
from basepair.imports import *
Using TensorFlow backend.
In [2]:
create_tf_session(2,3)
Out[2]:
<tensorflow.python.client.session.Session at 0x7f8245cee2b0>
In [3]:
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
In [4]:
bpnet = BPNetPredictor.from_mdir(model_dir)
WARNING:tensorflow:From /users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:497: calling conv1d (from tensorflow.python.ops.nn_ops) with data_format=NHWC is deprecated and will be removed in a future version.
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
2018-12-07 13:17:30,358 [WARNING] From /users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:497: calling conv1d (from tensorflow.python.ops.nn_ops) with data_format=NHWC is deprecated and will be removed in a future version.
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
WARNING:tensorflow:From /users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:198: retry (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Use the retry module or similar alternatives.
2018-12-07 13:17:38,135 [WARNING] From /users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:198: retry (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Use the retry module or similar alternatives.
In [5]:
from concise.preprocessing import encodeDNA
In [6]:
from basepair.plot.tracks import plot_tracks, filter_tracks
In [7]:
from basepair.exp.chipnexus.simulate import generate_seq, postproc, average_profiles, flatten
In [8]:
onehot = encodeDNA([generate_seq("ATTTGCATAACAAAG", seqlen=1000)])
In [9]:
onehot = np.array([onehot, onehot]).squeeze()
In [10]:
onehot.shape
Out[10]:
(2, 1000, 4)
In [11]:
bpnet.imp_score(onehot, 'Oct4', method="grad", pred_summary='weighted')[0] * onehot[0]
Out[11]:
array([[ 0.00000000e+00, -0.00000000e+00,  0.00000000e+00,
        -8.69544165e-04],
       [ 0.00000000e+00, -0.00000000e+00, -1.03678799e-03,
         0.00000000e+00],
       [-0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         2.44161347e-05],
       ...,
       [ 0.00000000e+00, -6.44786458e-04,  0.00000000e+00,
         0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  5.83029818e-04,
         0.00000000e+00],
       [ 0.00000000e+00, -1.22902542e-03,  0.00000000e+00,
         0.00000000e+00]])
In [12]:
bpnet.imp_score(onehot, 'Oct4', method="grad", pred_summary='count')[0] * onehot[0]
Out[12]:
array([[ 0.        , -0.        ,  0.        , -0.00257634],
       [ 0.        , -0.        , -0.00227159,  0.        ],
       [-0.        ,  0.        , -0.        ,  0.00122822],
       ...,
       [-0.        ,  0.00128848,  0.        , -0.        ],
       [-0.        ,  0.        ,  0.00148865, -0.        ],
       [-0.        ,  0.00150844, -0.        , -0.        ]])
In [13]:
bpnet.imp_score(onehot, 'Oct4', method="ism", pred_summary='weighted')[0] * onehot[0]
Out[13]:
array([[ 0.        , -0.        , -0.        , -0.1640625 ],
       [-0.        , -0.        ,  0.38769531,  0.        ],
       [-0.        ,  0.        ,  0.        ,  0.03515625],
       ...,
       [-0.        , -0.62841797,  0.        ,  0.        ],
       [-0.        , -0.        ,  0.25439453,  0.        ],
       [-0.        ,  0.15380859,  0.        , -0.        ]])
In [14]:
bpnet.imp_score(onehot, 'Oct4', method="ism", pred_summary='count')[0] * onehot[0]
Out[14]:
array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        -7.57694244e-04],
       [ 0.00000000e+00, -0.00000000e+00,  6.36577606e-05,
         0.00000000e+00],
       [-0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        -8.20159912e-04],
       ...,
       [-0.00000000e+00,  4.49419022e-04, -0.00000000e+00,
         0.00000000e+00],
       [ 0.00000000e+00, -0.00000000e+00,  1.58500671e-03,
        -0.00000000e+00],
       [-0.00000000e+00,  1.68561935e-04,  0.00000000e+00,
        -0.00000000e+00]])
In [15]:
bpnet.imp_score(onehot, 'Oct4', method="deeplift", pred_summary='weighted')[0] * onehot[0]
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
DeepExplain: running "deeplift" explanation method (5)
Model with multiple inputs:  False
Out[15]:
array([[ 0.        , -0.        , -0.        ,  0.00065044],
       [ 0.        , -0.        , -0.00083281,  0.        ],
       [ 0.        , -0.        , -0.        ,  0.00138038],
       ...,
       [ 0.        , -0.00053166,  0.        ,  0.        ],
       [ 0.        , -0.        ,  0.00073254,  0.        ],
       [ 0.        , -0.00060731,  0.        ,  0.        ]])
In [16]:
# running again: should see no messages from DeepExplain
bpnet.imp_score(onehot, 'Oct4', method="deeplift", pred_summary='weighted')[0] * onehot[0]
Out[16]:
array([[ 0.        , -0.        , -0.        ,  0.00065044],
       [ 0.        , -0.        , -0.00083281,  0.        ],
       [ 0.        , -0.        , -0.        ,  0.00138038],
       ...,
       [ 0.        , -0.00053166,  0.        ,  0.        ],
       [ 0.        , -0.        ,  0.00073254,  0.        ],
       [ 0.        , -0.00060731,  0.        ,  0.        ]])
In [17]:
# running on 'count': should see no messages from DeepExplain
bpnet.imp_score(onehot, 'Oct4', method="deeplift", pred_summary='count')[0] * onehot[0]
Out[17]:
array([[ 0.00000000e+00, -0.00000000e+00, -0.00000000e+00,
         1.16095156e-03],
       [ 0.00000000e+00, -0.00000000e+00,  4.39738214e-04,
         0.00000000e+00],
       [ 0.00000000e+00, -0.00000000e+00, -0.00000000e+00,
         1.64940208e-03],
       ...,
       [ 0.00000000e+00, -6.51323644e-05,  0.00000000e+00,
        -0.00000000e+00],
       [ 0.00000000e+00, -0.00000000e+00,  2.87521398e-03,
        -0.00000000e+00],
       [ 0.00000000e+00,  1.03934472e-04,  0.00000000e+00,
         0.00000000e+00]])
In [18]:
# grad stopped working since deeplift messes up with the tensorflow graph. Is there a way to keep them separate?
bpnet.imp_score(onehot, 'Oct4', method="grad", pred_summary='weighted')[0] * onehot[0]
Out[18]:
array([[ 0.00000000e+00, -0.00000000e+00,  0.00000000e+00,
        -8.69544048e-04],
       [ 0.00000000e+00, -0.00000000e+00, -1.03678787e-03,
         0.00000000e+00],
       [-0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         2.44161929e-05],
       ...,
       [ 0.00000000e+00, -6.44786516e-04,  0.00000000e+00,
         0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  5.83029876e-04,
         0.00000000e+00],
       [ 0.00000000e+00, -1.22902542e-03,  0.00000000e+00,
         0.00000000e+00]])

Old code

Old code had a bug in which all the importance scores had the same function cache

k = f"{strand}/{task_id}/{pred_summary}"
    if k in self.grad_fns:
        return self.grad_fns[k]

I updated it to:

k = f"ism/{strand}/{task_id}/{pred_summary}"
    if k in self.grad_fns:
        return self.grad_fns[k]
In [19]:
imp_scores = {method: bpnet.imp_score(onehot, 'Oct4', method=method, pred_summary='weighted')[0] * onehot[0]
              for method in ['grad', 'deeplift', 'ism']}

plot_tracks(filter_tracks(imp_scores, [400, 600]));
In [20]:
imp_scores = {method: bpnet.imp_score(onehot, 'Oct4', method=method, pred_summary='count')[0] * onehot[0]
              for method in ['grad', 'deeplift', 'ism']}

plot_tracks(filter_tracks(imp_scores, [400, 600]));