BPNet tf-Modisco report

In [1]:
modisco_dir = "/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/valid/new-hparams"
In [2]:
# Parameters
modisco_dir = "."
In [3]:
from basepair.modisco.results import ModiscoResult
from basepair.config import get_data_dir
from basepair.utils import read_json
from basepair.plot.vdom import vdom_modisco
from kipoi.readers import HDF5Reader
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from plotnine import *
Using TensorFlow backend.
In [4]:
mr = ModiscoResult(f"{modisco_dir}/modisco.h5")
mr.open()
In [5]:
# load the data
modisco_kwargs = read_json(os.path.join(modisco_dir, "kwargs.json"))
d = HDF5Reader(modisco_kwargs['imp_scores'])
d.open()
In [6]:
strand_dist_file = f"{modisco_dir}/strand_distances.h5"
if modisco_kwargs.get("ignore_strand_dist", False) and os.path.exists(strand_dist_file):
    included_samples = HDF5Reader.load(strand_dist_file)['included_samples']
else:
    included_samples = np.ones(d.f['inputs'].shape[:1], dtype=bool)
    
if modisco_kwargs.get("filter_npy", None) is not None:
    included_samples = np.load(modisco_kwargs['filter_npy']) * included_samples
    
    
id_hash = pd.DataFrame({"peak_id": d.f['/metadata/interval_from_task'][:][included_samples], 
                        "example_idx": np.arange(d.f['/metadata/interval_from_task'][included_samples].shape[0])})
tasks = list(d.f["targets"]["profile"].keys())
In [7]:
# get all seqlet instances
dfp = mr.seqlet_df_instances().rename(columns=dict(seqname="example_idx"))
dfp = pd.merge(dfp, id_hash, on="example_idx")
TF-MoDISco is using the TensorFlow backend.
In [8]:
# row = example_idx
total_counts = pd.DataFrame({task: d.f[f"/targets/profile/{task}"][:][included_samples].sum(axis=-1).sum(axis=-1)
          for task in tasks
         })
In [9]:
len(mr.patterns())
Out[9]:
112
In [10]:
# total number of seqlets
len(dfp)
Out[10]:
48825
In [11]:
# Number of metaclusters
len(mr.metaclusters())
Out[11]:
15

Number of seqlets per pattern

In [12]:
mc_stat = mr.metacluster_stats()
In [13]:
ggplot(aes(x="pattern", y='n'), mc_stat) + geom_bar(stat='identity') + \
    facet_wrap("~metacluster", ncol=4, labeller='label_both') + \
    ylab("Number of seqlets") + theme_classic()
Out[13]:
<ggplot: (-9223363246194832661)>

Zoom-into the 500 seqlet range

In [14]:
ggplot(aes(x="pattern", y='n'), mc_stat) + geom_bar(stat='identity') + \
    facet_wrap("~metacluster", ncol=4, labeller='label_both') + \
    ylab("Number of seqlets") + theme_classic() + coord_cartesian(ylim=[0, 500])
Out[14]:
<ggplot: (8790660168417)>

Important tasks per metacluster

In [15]:
mcs_grouped = mc_stat.groupby("metacluster").n.agg(["count", "sum"]).reset_index()
fig, ax = plt.subplots(2, 1, sharex=False, figsize=(18,6), 
                       gridspec_kw={'height_ratios': [2,1]})
mcs_grouped.plot("metacluster", "count", 
                 label="# patterns per metacluster", style="o--", 
                 ax=ax[0], 
                 yticks=range(mcs_grouped['count'].max()+1),
                 xticks=range(38),
                 fontsize='large',
                 xlim=(-.5, len(mr.metaclusters()) - .5 ))
mcs_grouped.plot("metacluster", "sum", 
                 label="# seqlets per metacluster", 
                 style="o--", ax=ax[0], secondary_y=True)
ax[0].grid(linewidth=0.2)
mr.plot_metacluster_activity(ax[1], cbar=False)
ax[1].set_title("Importance score activity: Red = positive, Blue = negative");
In [16]:
vdom_modisco(mr, "plots", total_counts, dfp, is_open=True, trim_frac=0.08, letter_width=0.15, height=0.5)
Out[16]:
metacluster_0, # patterns: 7, # seqlets: 3422, important for: Klf4,Klf4/count,-Nanog/count
  • pattern_0: # seqlets: 2564
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 2521 / 98428 regions (2.6%)

  • pattern_1: # seqlets: 257
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 257 / 98428 regions (0.3%)

  • pattern_2: # seqlets: 202
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 202 / 98428 regions (0.2%)

  • pattern_3: # seqlets: 108
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 107 / 98428 regions (0.1%)

  • pattern_4: # seqlets: 98
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 96 / 98428 regions (0.1%)

  • pattern_5: # seqlets: 98
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 96 / 98428 regions (0.1%)

  • pattern_6: # seqlets: 95
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 93 / 98428 regions (0.1%)

metacluster_1, # patterns: 17, # seqlets: 12809, important for: Klf4,Klf4/count,Nanog,Nanog/count,Oct4,Oct4/count,Sox2,Sox2/count
  • pattern_0: # seqlets: 7860
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 7618 / 98428 regions (7.7%)

  • pattern_1: # seqlets: 885
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 875 / 98428 regions (0.9%)

  • pattern_2: # seqlets: 536
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 536 / 98428 regions (0.5%)

  • pattern_3: # seqlets: 537
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 537 / 98428 regions (0.5%)

  • pattern_4: # seqlets: 488
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 488 / 98428 regions (0.5%)

  • pattern_5: # seqlets: 416
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 416 / 98428 regions (0.4%)

  • pattern_6: # seqlets: 339
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 337 / 98428 regions (0.3%)

  • pattern_7: # seqlets: 310
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 310 / 98428 regions (0.3%)

  • pattern_8: # seqlets: 209
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 209 / 98428 regions (0.2%)

  • pattern_9: # seqlets: 193
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 193 / 98428 regions (0.2%)

  • pattern_10: # seqlets: 218
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 218 / 98428 regions (0.2%)

  • pattern_11: # seqlets: 180
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 180 / 98428 regions (0.2%)

  • pattern_12: # seqlets: 170
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 170 / 98428 regions (0.2%)

  • pattern_13: # seqlets: 128
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 128 / 98428 regions (0.1%)

  • pattern_14: # seqlets: 120
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 120 / 98428 regions (0.1%)

  • pattern_15: # seqlets: 153
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 153 / 98428 regions (0.2%)

  • pattern_16: # seqlets: 67
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 67 / 98428 regions (0.1%)

metacluster_2, # patterns: 12, # seqlets: 6917, important for: Klf4,Nanog,Nanog/count,Oct4,Oct4/count,Sox2,Sox2/count
  • pattern_0: # seqlets: 3547
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 3464 / 98428 regions (3.5%)

  • pattern_1: # seqlets: 1014
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 1007 / 98428 regions (1.0%)

  • pattern_2: # seqlets: 870
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 849 / 98428 regions (0.9%)

  • pattern_3: # seqlets: 298
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 298 / 98428 regions (0.3%)

  • pattern_4: # seqlets: 246
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 246 / 98428 regions (0.2%)

  • pattern_5: # seqlets: 220
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 220 / 98428 regions (0.2%)

  • pattern_6: # seqlets: 145
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 145 / 98428 regions (0.1%)

  • pattern_7: # seqlets: 171
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 170 / 98428 regions (0.2%)

  • pattern_8: # seqlets: 156
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 156 / 98428 regions (0.2%)

  • pattern_9: # seqlets: 102
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 102 / 98428 regions (0.1%)

  • pattern_10: # seqlets: 84
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 84 / 98428 regions (0.1%)

  • pattern_11: # seqlets: 64
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 64 / 98428 regions (0.1%)

metacluster_3, # patterns: 10, # seqlets: 3573, important for: Nanog,Nanog/count,Oct4,Oct4/count,Sox2,Sox2/count
  • pattern_0: # seqlets: 1789
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 1767 / 98428 regions (1.8%)

  • pattern_1: # seqlets: 345
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 345 / 98428 regions (0.4%)

  • pattern_2: # seqlets: 319
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 318 / 98428 regions (0.3%)

  • pattern_3: # seqlets: 284
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 281 / 98428 regions (0.3%)

  • pattern_4: # seqlets: 205
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 204 / 98428 regions (0.2%)

  • pattern_5: # seqlets: 224
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 223 / 98428 regions (0.2%)

  • pattern_6: # seqlets: 115
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 115 / 98428 regions (0.1%)

  • pattern_7: # seqlets: 112
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 112 / 98428 regions (0.1%)

  • pattern_8: # seqlets: 79
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 78 / 98428 regions (0.1%)

  • pattern_9: # seqlets: 101
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 101 / 98428 regions (0.1%)

metacluster_4, # patterns: 8, # seqlets: 3430, important for: Nanog,Nanog/count
  • pattern_0: # seqlets: 2035
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 2011 / 98428 regions (2.0%)

  • pattern_1: # seqlets: 323
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 322 / 98428 regions (0.3%)

  • pattern_2: # seqlets: 244
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 244 / 98428 regions (0.2%)

  • pattern_3: # seqlets: 283
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 283 / 98428 regions (0.3%)

  • pattern_4: # seqlets: 156
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 156 / 98428 regions (0.2%)

  • pattern_5: # seqlets: 122
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 122 / 98428 regions (0.1%)

  • pattern_6: # seqlets: 114
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 113 / 98428 regions (0.1%)

  • pattern_7: # seqlets: 153
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 153 / 98428 regions (0.2%)

metacluster_5, # patterns: 8, # seqlets: 3005, important for: Nanog
  • pattern_0: # seqlets: 1632
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 1614 / 98428 regions (1.6%)

  • pattern_1: # seqlets: 309
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 308 / 98428 regions (0.3%)

  • pattern_2: # seqlets: 254
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 254 / 98428 regions (0.3%)

  • pattern_3: # seqlets: 204
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 204 / 98428 regions (0.2%)

  • pattern_4: # seqlets: 149
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 149 / 98428 regions (0.2%)

  • pattern_5: # seqlets: 227
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 226 / 98428 regions (0.2%)

  • pattern_6: # seqlets: 141
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 141 / 98428 regions (0.1%)

  • pattern_7: # seqlets: 89
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 88 / 98428 regions (0.1%)

metacluster_6, # patterns: 6, # seqlets: 2438, important for: Nanog,Nanog/count,Oct4/count,Sox2,Sox2/count
  • pattern_0: # seqlets: 767
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 759 / 98428 regions (0.8%)

  • pattern_1: # seqlets: 507
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 504 / 98428 regions (0.5%)

  • pattern_2: # seqlets: 379
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 378 / 98428 regions (0.4%)

  • pattern_3: # seqlets: 307
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 307 / 98428 regions (0.3%)

  • pattern_4: # seqlets: 261
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 261 / 98428 regions (0.3%)

  • pattern_5: # seqlets: 217
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 216 / 98428 regions (0.2%)

metacluster_7, # patterns: 7, # seqlets: 3429, important for: Klf4,Klf4/count
  • pattern_0: # seqlets: 2011
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 1986 / 98428 regions (2.0%)

  • pattern_1: # seqlets: 635
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 633 / 98428 regions (0.6%)

  • pattern_2: # seqlets: 334
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 326 / 98428 regions (0.3%)

  • pattern_3: # seqlets: 195
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 190 / 98428 regions (0.2%)

  • pattern_4: # seqlets: 79
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 78 / 98428 regions (0.1%)

  • pattern_5: # seqlets: 107
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 105 / 98428 regions (0.1%)

  • pattern_6: # seqlets: 68
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 67 / 98428 regions (0.1%)

metacluster_8, # patterns: 5, # seqlets: 1377, important for: Nanog,Nanog/count,Sox2,Sox2/count
  • pattern_0: # seqlets: 382
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 379 / 98428 regions (0.4%)

  • pattern_1: # seqlets: 339
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 339 / 98428 regions (0.3%)

  • pattern_2: # seqlets: 230
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 229 / 98428 regions (0.2%)

  • pattern_3: # seqlets: 255
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 255 / 98428 regions (0.3%)

  • pattern_4: # seqlets: 171
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 171 / 98428 regions (0.2%)

metacluster_9, # patterns: 6, # seqlets: 1954, important for: Nanog,Nanog/count,Sox2/count
  • pattern_0: # seqlets: 1116
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 1110 / 98428 regions (1.1%)

  • pattern_1: # seqlets: 231
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 231 / 98428 regions (0.2%)

  • pattern_2: # seqlets: 180
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 180 / 98428 regions (0.2%)

  • pattern_3: # seqlets: 164
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 164 / 98428 regions (0.2%)

  • pattern_4: # seqlets: 112
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 112 / 98428 regions (0.1%)

  • pattern_5: # seqlets: 151
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 151 / 98428 regions (0.2%)

metacluster_10, # patterns: 7, # seqlets: 1425, important for: Klf4,Nanog,Nanog/count,Oct4/count,Sox2,Sox2/count
  • pattern_0: # seqlets: 434
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 431 / 98428 regions (0.4%)

  • pattern_1: # seqlets: 507
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 502 / 98428 regions (0.5%)

  • pattern_2: # seqlets: 92
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 92 / 98428 regions (0.1%)

  • pattern_3: # seqlets: 113
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 112 / 98428 regions (0.1%)

  • pattern_4: # seqlets: 115
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 113 / 98428 regions (0.1%)

  • pattern_5: # seqlets: 91
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 91 / 98428 regions (0.1%)

  • pattern_6: # seqlets: 73
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 73 / 98428 regions (0.1%)

metacluster_11, # patterns: 3, # seqlets: 771, important for: Nanog,Nanog/count,Sox2
  • pattern_0: # seqlets: 292
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 292 / 98428 regions (0.3%)

  • pattern_1: # seqlets: 221
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 220 / 98428 regions (0.2%)

  • pattern_2: # seqlets: 258
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 258 / 98428 regions (0.3%)

metacluster_12, # patterns: 3, # seqlets: 1888, important for: Klf4,Klf4/count,Oct4/count,Sox2/count
  • pattern_0: # seqlets: 1521
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 1506 / 98428 regions (1.5%)

  • pattern_1: # seqlets: 245
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 240 / 98428 regions (0.2%)

  • pattern_2: # seqlets: 122
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 122 / 98428 regions (0.1%)

metacluster_13, # patterns: 7, # seqlets: 1148, important for: Klf4,Klf4/count,Nanog
  • pattern_0: # seqlets: 255
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 255 / 98428 regions (0.3%)

  • pattern_1: # seqlets: 137
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 137 / 98428 regions (0.1%)

  • pattern_2: # seqlets: 187
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 187 / 98428 regions (0.2%)

  • pattern_3: # seqlets: 181
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 181 / 98428 regions (0.2%)

  • pattern_4: # seqlets: 111
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 111 / 98428 regions (0.1%)

  • pattern_5: # seqlets: 131
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 131 / 98428 regions (0.1%)

  • pattern_6: # seqlets: 146
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 146 / 98428 regions (0.1%)

metacluster_14, # patterns: 6, # seqlets: 1239, important for: Klf4,Nanog
  • pattern_0: # seqlets: 426
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 422 / 98428 regions (0.4%)

  • pattern_1: # seqlets: 257
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 257 / 98428 regions (0.3%)

  • pattern_2: # seqlets: 220
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 220 / 98428 regions (0.2%)

  • pattern_3: # seqlets: 94
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 94 / 98428 regions (0.1%)

  • pattern_4: # seqlets: 103
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 103 / 98428 regions (0.1%)

  • pattern_5: # seqlets: 139
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 132 / 98428 regions (0.1%)

In [17]:
print("Metaclusters heatmap")
import seaborn as sns
activity_patterns = np.array(mr.f.f['metaclustering_results']['attribute_vectors'])[
                    np.array(
        [x[0] for x in sorted(
                enumerate(mr.f.f['metaclustering_results']['metacluster_indices']),
               key=lambda x: x[1])])]
sns.heatmap(activity_patterns, center=0);
Metaclusters heatmap