BPNet tf-Modisco report

In [5]:
url_dir = "http://mitra.stanford.edu/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/valid/plots/"

mdir = "/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/valid"
In [15]:
from basepair.modisco import ModiscoResult
from basepair.config import get_data_dir
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from plotnine import *
In [7]:
mr = ModiscoResult(f"{mdir}/modisco.h5")

mr.open()
In [8]:
#mr.close()
In [9]:
# Number of patterns
len(mr.patterns())
Out[9]:
70
In [10]:
# Number of metaclusters
len(mr.metaclusters())
Out[10]:
37
In [11]:
mc_stat = mr.metacluster_stats()
mc_stat.head()
Out[11]:
metacluster pattern n
0 0 0 3675
1 0 1 1663
2 0 2 773
3 0 3 428
4 0 4 290

Number of seqlets per pattern

In [12]:
ggplot(aes(x="pattern", y='n'), mc_stat) + geom_bar(stat='identity') + \
    facet_wrap("~metacluster", ncol=4, labeller='label_both') + \
    ylab("Number of seqlets") + theme_classic()
Out[12]:
<ggplot: (-9223363292691938146)>

Zoom-into the 500 seqlet range

In [13]:
ggplot(aes(x="pattern", y='n'), mc_stat) + geom_bar(stat='identity') + \
    facet_wrap("~metacluster", ncol=4, labeller='label_both') + \
    ylab("Number of seqlets") + theme_classic() + coord_cartesian(ylim=[0, 500])
Out[13]:
<ggplot: (8744145557894)>

Important tasks per metacluster

In [16]:
mcs_grouped = mc_stat.groupby("metacluster").n.agg(["count", "sum"]).reset_index()

fig, ax = plt.subplots(2, 1, sharex=False, figsize=(18,6), 
                       gridspec_kw={'height_ratios': [2,1]})
mcs_grouped.plot("metacluster", "count", 
                 label="# patterns per metacluster", style="o--", 
                 ax=ax[0], 
                 yticks=range(mcs_grouped['count'].max()+1),
                 xticks=range(38),
                 fontsize='large',
                 xlim=(-.5, len(mr.metaclusters()) - .5 ))
mcs_grouped.plot("metacluster", "sum", 
                 label="# seqlets per metacluster", 
                 style="o--", ax=ax[0], secondary_y=True)
ax[0].grid(linewidth=0.2)
mr.plot_metacluster_activity(ax[1], cbar=False)
ax[1].set_title("Importance score activity: Red = positive, Blue = negative");
In [17]:
print("Metaclusters heatmap")
import seaborn as sns
activity_patterns = np.array(mr.f.f['metaclustering_results']['attribute_vectors'])[
                    np.array(
        [x[0] for x in sorted(
                enumerate(mr.f.f['metaclustering_results']['metacluster_indices']),
               key=lambda x: x[1])])]
sns.heatmap(activity_patterns, center=0)
Metaclusters heatmap
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3d9e965f28>
In [21]:
mr.vdom(url_dir, is_open=True, trim_frac=0.08, letter_width=0.15, height=0.5)
Out[21]:
metacluster_0, # patterns: 12, # seqlets: 8369, important for: Klf4,Nanog,Oct4,Sox2
  • pattern_0: # seqlets: 3675
  • pattern_1: # seqlets: 1663
  • pattern_2: # seqlets: 773
  • pattern_3: # seqlets: 428
  • pattern_4: # seqlets: 290
  • pattern_5: # seqlets: 293
  • pattern_6: # seqlets: 282
  • pattern_7: # seqlets: 266
  • pattern_8: # seqlets: 201
  • pattern_9: # seqlets: 200
  • pattern_10: # seqlets: 179
  • pattern_11: # seqlets: 119
metacluster_1, # patterns: 7, # seqlets: 2619, important for: Nanog
  • pattern_0: # seqlets: 1800
  • pattern_1: # seqlets: 177
  • pattern_2: # seqlets: 147
  • pattern_3: # seqlets: 129
  • pattern_4: # seqlets: 157
  • pattern_5: # seqlets: 116
  • pattern_6: # seqlets: 93
metacluster_2, # patterns: 5, # seqlets: 3954, important for: Klf4
  • pattern_0: # seqlets: 3101
  • pattern_1: # seqlets: 457
  • pattern_2: # seqlets: 173
  • pattern_3: # seqlets: 94
  • pattern_4: # seqlets: 129
metacluster_3, # patterns: 8, # seqlets: 3177, important for: Nanog,Oct4,Sox2
  • pattern_0: # seqlets: 1004
  • pattern_1: # seqlets: 818
  • pattern_2: # seqlets: 493
  • pattern_3: # seqlets: 285
  • pattern_4: # seqlets: 173
  • pattern_5: # seqlets: 196
  • pattern_6: # seqlets: 109
  • pattern_7: # seqlets: 99
metacluster_4, # patterns: 4, # seqlets: 2026, important for: Nanog,Sox2
  • pattern_0: # seqlets: 1171
  • pattern_1: # seqlets: 457
  • pattern_2: # seqlets: 205
  • pattern_3: # seqlets: 193
metacluster_6, # patterns: 7, # seqlets: 1437, important for: Klf4,Nanog,Sox2
  • pattern_0: # seqlets: 548
  • pattern_1: # seqlets: 219
  • pattern_2: # seqlets: 196
  • pattern_3: # seqlets: 151
  • pattern_4: # seqlets: 150
  • pattern_5: # seqlets: 88
  • pattern_6: # seqlets: 85
metacluster_7, # patterns: 5, # seqlets: 1162, important for: Klf4,Nanog
  • pattern_0: # seqlets: 447
  • pattern_1: # seqlets: 353
  • pattern_2: # seqlets: 157
  • pattern_3: # seqlets: 104
  • pattern_4: # seqlets: 101
metacluster_9, # patterns: 5, # seqlets: 929, important for: Klf4,Oct4,Sox2
  • pattern_0: # seqlets: 369
  • pattern_1: # seqlets: 189
  • pattern_2: # seqlets: 179
  • pattern_3: # seqlets: 117
  • pattern_4: # seqlets: 75
metacluster_10, # patterns: 5, # seqlets: 706, important for: Oct4,Sox2
  • pattern_0: # seqlets: 329
  • pattern_1: # seqlets: 117
  • pattern_2: # seqlets: 84
  • pattern_3: # seqlets: 83
  • pattern_4: # seqlets: 93
metacluster_13, # patterns: 1, # seqlets: 178, important for: Nanog,Oct4
  • pattern_0: # seqlets: 178
metacluster_15, # patterns: 1, # seqlets: 235, important for: Oct4
  • pattern_0: # seqlets: 235
metacluster_18, # patterns: 3, # seqlets: 363, important for: Klf4,Oct4
  • pattern_0: # seqlets: 197
  • pattern_1: # seqlets: 74
  • pattern_2: # seqlets: 92
metacluster_21, # patterns: 1, # seqlets: 222, important for: Klf4,-Nanog,-Oct4,-Sox2
  • pattern_0: # seqlets: 222
metacluster_22, # patterns: 1, # seqlets: 264, important for: Sox2
  • pattern_0: # seqlets: 264
metacluster_23, # patterns: 2, # seqlets: 195, important for: Klf4,Nanog,Oct4
  • pattern_0: # seqlets: 85
  • pattern_1: # seqlets: 110
metacluster_25, # patterns: 1, # seqlets: 193, important for: Klf4,Sox2
  • pattern_0: # seqlets: 193
metacluster_28, # patterns: 1, # seqlets: 166, important for: Klf4,-Sox2
  • pattern_0: # seqlets: 166
metacluster_31, # patterns: 1, # seqlets: 141, important for: Klf4,-Oct4,-Sox2
  • pattern_0: # seqlets: 141

Inspect individual patterns - API example

In [691]:
from basepair.modisco import ModiscoResult
mr = ModiscoResult(f"{mdir}/modisco.h5")
mr.open()
In [698]:
mr.plot_pattern("metacluster_0", "pattern_0")
In [690]:
mr.close()