In [42]:
# Bias-corrected results
fig = plot_patterns(list(reversed(short_patterns_clustered)), tasks, pattern_trim=(26-5, 51-5))
In [79]:
figures
Out[79]:
PosixPath('/users/avsec/workspace/basepair/data/figures/modisco')
In [ ]:
fig.savefig(figures / 'per-tf.short-patterns.nexus.bias-corrected.pdf')
fig.savefig(figures / 'per-tf.short-patterns.nexus.bias-corrected.png')

Old results

In [114]:
fig = plot_patterns(short_patterns_clustered, tasks, pattern_trim=(26, 51))
fig.savefig(figures / 'per-tf.short-patterns.nexus.bias-corrected.pdf')
fig.savefig(figures / 'per-tf.short-patterns.nexus.bias-corrected.png')

Notes

  • I used the same y axis for
    • profiles in the same columns
    • imp. scores for the same TF
  • Seq. information content was always on the [0, 2] range

Joint modisco run

  • [ ] Compare the joint modisco run vs per-TF run
    • [ ] Which new motif's do we discover? TEs?
    • [ ] What's the instances overlap?
      • pairwise align them (?)

Other datasets

ChIP-nexus, profile model, peaks, not input-corrected

In [75]:
tasks = ['Oct4', 'Sox2', 'Nanog', 'Klf4']
In [76]:
model_dir = sdir / 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE-2'
mr_dict = [(task, ModiscoResult(model_dir / f"deeplift/{task}/out/profile/wn/modisco.h5"))
           for task in tasks]
for k,v in mr_dict: v.open()
footprints = {t: read_pkl(model_dir / f"deeplift/{t}/out/profile/wn/footprints.pkl")
              for t in tasks}
In [80]:
patterns = get_patterns(mr_dict, footprints, tasks, min_n_seqlets=300)
short_patterns_clustered = cluster_align_patterns([p for p in patterns if p.seq_info_content < 30], n_clusters=len(patterns))
100%|██████████| 19/19 [00:01<00:00, 13.29it/s]
100%|██████████| 19/19 [00:00<00:00, 232.94it/s]
In [81]:
# non-bias corrected
fig, ax = plt.subplots(figsize=get_figsize(.25))
ax.hist([p.seq_info_content for p in patterns], 20);

ax.set_ylabel("Frequency");
ax.set_xlabel("Sequence IC");
# fig.savefig(figures / 'per-tf.pattern-length.nexus.bias-corrected.pdf', bbox_inches='tight', pad_inches=0)
In [83]:
fig = plot_patterns(short_patterns_clustered, tasks, pattern_trim=(26, 45))
fig.savefig(figures / 'per-tf.short-patterns.chip-nexus.peaks-profile.not-bias-corrected.pdf')
fig.savefig(figures / 'per-tf.short-patterns.chip-nexus.peaks-profile.not-bias-corrected.png')

ChIP-nexus, binary model, genome-wide

In [84]:
model_dir = sdir / 'nexus,gw,OSNK,1,0,0,FALSE,valid,0.5,64,25,0.001,9,FALSE'
mr_dict = [(task, ModiscoResult(model_dir / f"deeplift/{task}/out/class/pre-act/modisco.h5"))
           for task in tasks]
for k,v in mr_dict: v.open()
footprints = {t: read_pkl(model_dir / f"deeplift/{t}/out/class/pre-act/footprints.pkl")
              for t in tasks}
In [85]:
patterns = get_patterns(mr_dict, footprints, tasks, min_n_seqlets=300)
short_patterns_clustered = cluster_align_patterns([p for p in patterns if p.seq_info_content < 30], n_clusters=len(patterns))
100%|██████████| 25/25 [00:02<00:00,  8.65it/s]
100%|██████████| 25/25 [00:00<00:00, 247.49it/s]
In [86]:
# non-bias corrected
fig, ax = plt.subplots(figsize=get_figsize(.25))
ax.hist([p.seq_info_content for p in patterns], 20);

ax.set_ylabel("Frequency");
ax.set_xlabel("Sequence IC");
# fig.savefig(figures / 'per-tf.pattern-length.nexus.bias-corrected.pdf', bbox_inches='tight', pad_inches=0)
In [88]:
fig = plot_patterns(short_patterns_clustered, tasks, pattern_trim=(26, 45))
fig.savefig(figures / 'per-tf.short-patterns.chip-nexus.gw-binary.pdf')
fig.savefig(figures / 'per-tf.short-patterns.chip-nexus.gw-binary.png')

ChIP-seq, profile model, peaks, input-corrected

In [44]:
chipseq_tasks = ['Oct4', 'Sox2', 'Nanog']
In [46]:
model_dir = sdir / 'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE,[1,50],TRUE'
mr_dict = [(task, ModiscoResult(model_dir / f"deeplift/{task}/out/profile/wn/modisco.h5"))
           for task in chipseq_tasks]
for k,v in mr_dict: v.open()
footprints = {t: read_pkl(model_dir / f"deeplift/{t}/out/profile/wn/footprints.pkl")
              for t in chipseq_tasks}
In [47]:
patterns = get_patterns(mr_dict, footprints, chipseq_tasks, min_n_seqlets=300)
short_patterns_clustered = cluster_align_patterns([p for p in patterns if p.seq_info_content < 30], n_clusters=len(patterns))
100%|██████████| 14/14 [00:00<00:00, 16.48it/s]
100%|██████████| 14/14 [00:00<00:00, 243.76it/s]
In [48]:
# non-bias corrected
fig, ax = plt.subplots(figsize=get_figsize(.25))
ax.hist([p.seq_info_content for p in patterns], 20);

ax.set_ylabel("Frequency");
ax.set_xlabel("Sequence IC");
# fig.savefig(figures / 'per-tf.pattern-length.nexus.bias-corrected.pdf', bbox_inches='tight', pad_inches=0)
In [57]:
fig = plot_patterns(short_patterns_clustered, tasks=chipseq_tasks, pattern_trim=(26, 45))
fig.savefig(figures / 'per-tf.short-patterns.chip-seq.peaks-profile.bias-corrected.pdf')
fig.savefig(figures / 'per-tf.short-patterns.chip-seq.peaks-profile.bias-corrected.png')

ChIP-seq, binary model, genome-wide

In [67]:
model_dir = sdir / 'seq,gw,OSN,1,0,0,FALSE,valid,0.5,64,50,0.001,9,FALSE'
mr_dict = [(task, ModiscoResult(model_dir / f"deeplift/{task}/out/class/pre-act/modisco.h5"))
           for task in chipseq_tasks]
for k,v in mr_dict: v.open()
footprints = {t: read_pkl(model_dir / f"deeplift/{t}/out/class/pre-act/footprints.pkl")
              for t in chipseq_tasks}
In [68]:
patterns = get_patterns(mr_dict, footprints, chipseq_tasks, min_n_seqlets=300)
short_patterns_clustered = cluster_align_patterns([p for p in patterns if p.seq_info_content < 30], n_clusters=len(patterns))
100%|██████████| 14/14 [00:00<00:00, 15.96it/s]
100%|██████████| 14/14 [00:00<00:00, 235.13it/s]
In [69]:
# non-bias corrected
fig, ax = plt.subplots(figsize=get_figsize(.25))
ax.hist([p.seq_info_content for p in patterns], 20);

ax.set_ylabel("Frequency");
ax.set_xlabel("Sequence IC");
# fig.savefig(figures / 'per-tf.pattern-length.nexus.bias-corrected.pdf', bbox_inches='tight', pad_inches=0)
In [71]:
fig = plot_patterns(short_patterns_clustered, chipseq_tasks, pattern_trim=(30, 50))
fig.savefig(figures / 'per-tf.short-patterns.chip-seq.gw-binary.bias-corrected.pdf')
fig.savefig(figures / 'per-tf.short-patterns.chip-seq.gw-binary.bias-corrected.png')

ChIP-seq, profile model, peaks, not input-corrected

In [58]:
model_dir = sdir / 'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE'
mr_dict = [(task, ModiscoResult(model_dir / f"deeplift/{task}/out/profile/wn/modisco.h5"))
           for task in chipseq_tasks]
for k,v in mr_dict: v.open()
footprints = {t: read_pkl(model_dir / f"deeplift/{t}/out/profile/wn/footprints.pkl")
              for t in chipseq_tasks}
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-58-2026096f10c9> in <module>
      1 model_dir = sdir / 'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE'
      2 mr_dict = [(task, ModiscoResult(model_dir / f"deeplift/{task}/out/profile/wn/modisco.h5"))
----> 3            for task in chipseq_tasks]
      4 for k,v in mr_dict: v.open()
      5 footprints = {t: read_pkl(model_dir / f"deeplift/{t}/out/profile/wn/footprints.pkl")

<ipython-input-58-2026096f10c9> in <listcomp>(.0)
      1 model_dir = sdir / 'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE'
      2 mr_dict = [(task, ModiscoResult(model_dir / f"deeplift/{task}/out/profile/wn/modisco.h5"))
----> 3            for task in chipseq_tasks]
      4 for k,v in mr_dict: v.open()
      5 footprints = {t: read_pkl(model_dir / f"deeplift/{t}/out/profile/wn/footprints.pkl")

~/workspace/basepair/basepair/modisco/results.py in __init__(self, fpath)
    104         self.fpath = fpath
    105         self.f = HDF5Reader(self.fpath)
--> 106         self.f.open()
    107 
    108         # example ranges. loaded when needed

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/kipoi/readers.py in open(self)
    108         """Open the file
    109         """
--> 110         self.__enter__()
    111 
    112     def close(self):

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/kipoi/readers.py in __enter__(self)
     99     def __enter__(self):
    100         import h5py
--> 101         self.f = h5py.File(self.file_path, "r")
    102         return self
    103 

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/h5py/_hl/files.py in __init__(self, name, mode, driver, libver, userblock_size, swmr, rdcc_nslots, rdcc_nbytes, rdcc_w0, track_order, **kwds)
    392                 fid = make_fid(name, mode, userblock_size,
    393                                fapl, fcpl=make_fcpl(track_order=track_order),
--> 394                                swmr=swmr)
    395 
    396             if swmr_support:

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/h5py/_hl/files.py in make_fid(name, mode, userblock_size, fapl, fcpl, swmr)
    168         if swmr and swmr_support:
    169             flags |= h5f.ACC_SWMR_READ
--> 170         fid = h5f.open(name, flags, fapl=fapl)
    171     elif mode == 'r+':
    172         fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)

h5py/_objects.pyx in h5py._objects.with_phil.wrapper()

h5py/_objects.pyx in h5py._objects.with_phil.wrapper()

h5py/h5f.pyx in h5py.h5f.open()

OSError: Unable to open file (unable to open file: name = '/oak/stanford/groups/akundaje/avsec/basepair/data/processed/comparison/output/seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE/deeplift/Sox2/out/profile/wn/modisco.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)
In [ ]:
patterns = get_patterns(mr_dict, footprints, chipseq_tasks, min_n_seqlets=300)
short_patterns_clustered = cluster_align_patterns([p for p in patterns if p.seq_info_content < 30], n_clusters=len(patterns))
In [ ]:
# non-bias corrected
fig, ax = plt.subplots(figsize=get_figsize(.25))
ax.hist([p.seq_info_content for p in patterns], 20);

ax.set_ylabel("Frequency");
ax.set_xlabel("Sequence IC");
# fig.savefig(figures / 'per-tf.pattern-length.nexus.bias-corrected.pdf', bbox_inches='tight', pad_inches=0)
In [ ]:
fig = plot_patterns(short_patterns_clustered, chipseq_tasks, pattern_trim=(26, 45))
fig.savefig(figures / 'per-tf.short-patterns.chip-seq.peaks-profile.not-bias-corrected.pdf')
fig.savefig(figures / 'per-tf.short-patterns.chip-seq.peaks-profile.not-bias-corrected.png')