In [1]:
from basepair.imports import *
# Imports
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from basepair.imports import *
from basepair.exp.paper.config import tf_colors
from basepair.functions import mean
from basepair.cli.imp_score import ImpScoreFile
Using TensorFlow backend.
In [2]:
create_tf_session(0)
Out[2]:
<tensorflow.python.client.session.Session at 0x7fdae31e93c8>
In [3]:
from basepair.seqmodel import SeqModel
In [4]:
from basepair.exp.chipnexus.simulate import random_seq
In [5]:
from concise.preprocessing import encodeDNA
In [6]:
seqs = encodeDNA([random_seq(1000) for i in range(512)])

With data augmentation

Random sequences

In [7]:
mdir='output/nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE,TRUE'
In [8]:
m = SeqModel.from_mdir(mdir)
WARNING:tensorflow:From /users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:497: calling conv1d (from tensorflow.python.ops.nn_ops) with data_format=NHWC is deprecated and will be removed in a future version.
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
2019-03-13 12:05:15,123 [WARNING] From /users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:497: calling conv1d (from tensorflow.python.ops.nn_ops) with data_format=NHWC is deprecated and will be removed in a future version.
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
WARNING:tensorflow:From /users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:198: retry (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Use the retry module or similar alternatives.
2019-03-13 12:05:28,181 [WARNING] From /users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:198: retry (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Use the retry module or similar alternatives.
In [34]:
x = m.neutral_bias_inputs(1000, 1000)

x['seq'] = seqs

preds = m.predict(x)
In [35]:
plt.plot(preds['Oct4/profile'].mean(axis=0)[:, 0])
plt.plot(preds['Oct4/profile'].mean(axis=0)[:, 1])
Out[35]:
[<matplotlib.lines.Line2D at 0x7f2e3c3e2978>]
In [36]:
plt.plot(preds['Sox2/profile'].mean(axis=0)[:, 0])
plt.plot(preds['Sox2/profile'].mean(axis=0)[:, 1])
Out[36]:
[<matplotlib.lines.Line2D at 0x7f2e08b56748>]

All zeros

In [37]:
x = m.neutral_bias_inputs(1000, 1000)

x['seq'] = np.zeros_like(seqs)

preds = m.predict(x)
In [9]:
preds = m.predict_preact(np.zeros_like(seqs))
In [10]:
plt.plot(preds['Oct4/profile'].mean(axis=0)[:, 0])
plt.plot(preds['Oct4/profile'].mean(axis=0)[:, 1])
Out[10]:
[<matplotlib.lines.Line2D at 0x7fed47b59048>]
In [39]:
plt.plot(preds['Sox2/profile'].mean(axis=0)[:, 0])
plt.plot(preds['Sox2/profile'].mean(axis=0)[:, 1])
Out[39]:
[<matplotlib.lines.Line2D at 0x7f2e08a95da0>]
In [38]:
plt.plot(preds['Oct4/profile'].mean(axis=0)[:, 0])
plt.plot(preds['Oct4/profile'].mean(axis=0)[:, 1])
Out[38]:
[<matplotlib.lines.Line2D at 0x7f2e08b33860>]
In [39]:
plt.plot(preds['Sox2/profile'].mean(axis=0)[:, 0])
plt.plot(preds['Sox2/profile'].mean(axis=0)[:, 1])
Out[39]:
[<matplotlib.lines.Line2D at 0x7f2e08a95da0>]

No data augmentation

In [7]:
mdir='output/nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE'
# Not using data augmentation
In [8]:
m = SeqModel.from_mdir(mdir)
WARNING:tensorflow:From /users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:497: calling conv1d (from tensorflow.python.ops.nn_ops) with data_format=NHWC is deprecated and will be removed in a future version.
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
2019-03-13 12:06:16,774 [WARNING] From /users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:497: calling conv1d (from tensorflow.python.ops.nn_ops) with data_format=NHWC is deprecated and will be removed in a future version.
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
WARNING:tensorflow:From /users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:198: retry (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Use the retry module or similar alternatives.
2019-03-13 12:06:33,102 [WARNING] From /users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:198: retry (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Use the retry module or similar alternatives.

Random sequences

In [9]:
x = m.neutral_bias_inputs(1000, 1000)

x['seq'] = seqs

preds = m.predict_preact(x)
In [10]:
plt.plot(preds['Oct4/profile'].mean(axis=0)[:, 0])
plt.plot(preds['Oct4/profile'].mean(axis=0)[:, 1])
Out[10]:
[<matplotlib.lines.Line2D at 0x7fd9a01df0f0>]
In [11]:
plt.plot(preds['Sox2/profile'].mean(axis=0)[:, 0])
plt.plot(preds['Sox2/profile'].mean(axis=0)[:, 1])
Out[11]:
[<matplotlib.lines.Line2D at 0x7fd77612bac8>]

All zeros

In [12]:
x = m.neutral_bias_inputs(1000, 1000)

x['seq'] = np.zeros_like(seqs)

preds = m.predict_preact(x)
In [13]:
plt.plot(preds['Oct4/profile'].mean(axis=0)[:, 0])
plt.plot(preds['Oct4/profile'].mean(axis=0)[:, 1])
Out[13]:
[<matplotlib.lines.Line2D at 0x7fd776099828>]
In [14]:
plt.plot(preds['Sox2/profile'].mean(axis=0)[:, 0])
plt.plot(preds['Sox2/profile'].mean(axis=0)[:, 1])
Out[14]:
[<matplotlib.lines.Line2D at 0x7fd776078908>]

With all 0 background

  • what is the distribution of importance scores on di-nucleotide shuffled sequences?
In [ ]: