BPNet tf-Modisco report

In [1]:
url_dir = "http://mitra.stanford.edu/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/valid/new-hparams/plots/"
modisco_dir = "/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/valid/new-hparams"
In [2]:
# Parameters
url_dir = "http://mitra.stanford.edu/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/valid/plots/"
modisco_dir = "."
In [3]:
from basepair.modisco import ModiscoResult
from basepair.config import get_data_dir
from basepair.utils import read_json
from basepair.plot.vdom import vdom_modisco
from kipoi.readers import HDF5Reader
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from plotnine import *
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
In [4]:
mr = ModiscoResult(f"{modisco_dir}/modisco.h5")
mr.open()
In [5]:
# load the data
modisco_kwargs = read_json(os.path.join(modisco_dir, "kwargs.json"))
d = HDF5Reader(modisco_kwargs['imp_scores'])
d.open()
In [6]:
strand_dist_file = f"{modisco_dir}/strand_distances.h5"
if modisco_kwargs.get("ignore_strand_dist", False) and os.path.exists(strand_dist_file):
    included_samples = HDF5Reader.load(strand_dist_file)['included_samples']
else:
    included_samples = np.ones(d.f['inputs'].shape[:1], dtype=bool)
    
if modisco_kwargs.get("filter_npy", None) is not None:
    included_samples = np.load(modisco_kwargs['filter_npy']) * included_samples
    
    
id_hash = pd.DataFrame({"peak_id": d.f['/metadata/interval_from_task'][:][included_samples], 
                        "example_idx": np.arange(d.f['/metadata/interval_from_task'][included_samples].shape[0])})
tasks = list(d.f["targets"]["profile"].keys())
In [7]:
# get all seqlet instances
dfp = mr.seqlet_df_instances().rename(columns=dict(seqname="example_idx"))
dfp = pd.merge(dfp,id_hash, on="example_idx")
In [8]:
# row = example_idx
total_counts = pd.DataFrame({task: d.f[f"/targets/profile/{task}"][:][included_samples].sum(axis=-1).sum(axis=-1)
          for task in tasks
         })
In [9]:
len(mr.patterns())
Out[9]:
70
In [10]:
# Number of metaclusters
len(mr.metaclusters())
Out[10]:
37

Number of seqlets per pattern

In [11]:
mc_stat = mr.metacluster_stats()
In [12]:
ggplot(aes(x="pattern", y='n'), mc_stat) + geom_bar(stat='identity') + \
    facet_wrap("~metacluster", ncol=4, labeller='label_both') + \
    ylab("Number of seqlets") + theme_classic()
Out[12]:
<ggplot: (8763116310164)>

Zoom-into the 500 seqlet range

In [13]:
ggplot(aes(x="pattern", y='n'), mc_stat) + geom_bar(stat='identity') + \
    facet_wrap("~metacluster", ncol=4, labeller='label_both') + \
    ylab("Number of seqlets") + theme_classic() + coord_cartesian(ylim=[0, 500])
Out[13]:
<ggplot: (8763116533922)>

Important tasks per metacluster

In [14]:
mcs_grouped = mc_stat.groupby("metacluster").n.agg(["count", "sum"]).reset_index()
fig, ax = plt.subplots(2, 1, sharex=False, figsize=(18,6), 
                       gridspec_kw={'height_ratios': [2,1]})
mcs_grouped.plot("metacluster", "count", 
                 label="# patterns per metacluster", style="o--", 
                 ax=ax[0], 
                 yticks=range(mcs_grouped['count'].max()+1),
                 xticks=range(38),
                 fontsize='large',
                 xlim=(-.5, len(mr.metaclusters()) - .5 ))
mcs_grouped.plot("metacluster", "sum", 
                 label="# seqlets per metacluster", 
                 style="o--", ax=ax[0], secondary_y=True)
ax[0].grid(linewidth=0.2)
mr.plot_metacluster_activity(ax[1], cbar=False)
ax[1].set_title("Importance score activity: Red = positive, Blue = negative");
In [15]:
vdom_modisco(mr, url_dir, total_counts, dfp, is_open=True, trim_frac=0.08, letter_width=0.15, height=0.5)
Out[15]:
metacluster_0, # patterns: 12, # seqlets: 8369, important for: Klf4,Nanog,Oct4,Sox2
  • pattern_0: # seqlets: 3675
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 3158 / 19137 regions (16.5%)

  • pattern_1: # seqlets: 1663
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 1525 / 19137 regions (8.0%)

  • pattern_2: # seqlets: 773
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 743 / 19137 regions (3.9%)

  • pattern_3: # seqlets: 428
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 415 / 19137 regions (2.2%)

  • pattern_4: # seqlets: 290
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 285 / 19137 regions (1.5%)

  • pattern_5: # seqlets: 293
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 292 / 19137 regions (1.5%)

  • pattern_6: # seqlets: 282
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 278 / 19137 regions (1.5%)

  • pattern_7: # seqlets: 266
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 266 / 19137 regions (1.4%)

  • pattern_8: # seqlets: 201
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 201 / 19137 regions (1.1%)

  • pattern_9: # seqlets: 200
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 200 / 19137 regions (1.0%)

  • pattern_10: # seqlets: 179
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 170 / 19137 regions (0.9%)

  • pattern_11: # seqlets: 119
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 119 / 19137 regions (0.6%)

metacluster_1, # patterns: 7, # seqlets: 2619, important for: Nanog
  • pattern_0: # seqlets: 1800
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 1657 / 19137 regions (8.7%)

  • pattern_1: # seqlets: 177
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 175 / 19137 regions (0.9%)

  • pattern_2: # seqlets: 147
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 147 / 19137 regions (0.8%)

  • pattern_3: # seqlets: 129
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 128 / 19137 regions (0.7%)

  • pattern_4: # seqlets: 157
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 147 / 19137 regions (0.8%)

  • pattern_5: # seqlets: 116
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 116 / 19137 regions (0.6%)

  • pattern_6: # seqlets: 93
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 92 / 19137 regions (0.5%)

metacluster_2, # patterns: 5, # seqlets: 3954, important for: Klf4
  • pattern_0: # seqlets: 3101
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 2711 / 19137 regions (14.2%)

  • pattern_1: # seqlets: 457
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 445 / 19137 regions (2.3%)

  • pattern_2: # seqlets: 173
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 171 / 19137 regions (0.9%)

  • pattern_3: # seqlets: 94
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 94 / 19137 regions (0.5%)

  • pattern_4: # seqlets: 129
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 127 / 19137 regions (0.7%)

metacluster_3, # patterns: 8, # seqlets: 3177, important for: Nanog,Oct4,Sox2
  • pattern_0: # seqlets: 1004
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 961 / 19137 regions (5.0%)

  • pattern_1: # seqlets: 818
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 790 / 19137 regions (4.1%)

  • pattern_2: # seqlets: 493
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 485 / 19137 regions (2.5%)

  • pattern_3: # seqlets: 285
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 280 / 19137 regions (1.5%)

  • pattern_4: # seqlets: 173
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 166 / 19137 regions (0.9%)

  • pattern_5: # seqlets: 196
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 196 / 19137 regions (1.0%)

  • pattern_6: # seqlets: 109
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 109 / 19137 regions (0.6%)

  • pattern_7: # seqlets: 99
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 99 / 19137 regions (0.5%)

metacluster_4, # patterns: 4, # seqlets: 2026, important for: Nanog,Sox2
  • pattern_0: # seqlets: 1171
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 1132 / 19137 regions (5.9%)

  • pattern_1: # seqlets: 457
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 450 / 19137 regions (2.4%)

  • pattern_2: # seqlets: 205
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 204 / 19137 regions (1.1%)

  • pattern_3: # seqlets: 193
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 193 / 19137 regions (1.0%)

metacluster_6, # patterns: 7, # seqlets: 1437, important for: Klf4,Nanog,Sox2
  • pattern_0: # seqlets: 548
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 539 / 19137 regions (2.8%)

  • pattern_1: # seqlets: 219
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 218 / 19137 regions (1.1%)

  • pattern_2: # seqlets: 196
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 191 / 19137 regions (1.0%)

  • pattern_3: # seqlets: 151
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 150 / 19137 regions (0.8%)

  • pattern_4: # seqlets: 150
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 150 / 19137 regions (0.8%)

  • pattern_5: # seqlets: 88
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 88 / 19137 regions (0.5%)

  • pattern_6: # seqlets: 85
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 84 / 19137 regions (0.4%)

metacluster_7, # patterns: 5, # seqlets: 1162, important for: Klf4,Nanog
  • pattern_0: # seqlets: 447
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 438 / 19137 regions (2.3%)

  • pattern_1: # seqlets: 353
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 351 / 19137 regions (1.8%)

  • pattern_2: # seqlets: 157
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 157 / 19137 regions (0.8%)

  • pattern_3: # seqlets: 104
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 104 / 19137 regions (0.5%)

  • pattern_4: # seqlets: 101
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 100 / 19137 regions (0.5%)

metacluster_9, # patterns: 5, # seqlets: 929, important for: Klf4,Oct4,Sox2
  • pattern_0: # seqlets: 369
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 360 / 19137 regions (1.9%)

  • pattern_1: # seqlets: 189
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 184 / 19137 regions (1.0%)

  • pattern_2: # seqlets: 179
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 179 / 19137 regions (0.9%)

  • pattern_3: # seqlets: 117
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 117 / 19137 regions (0.6%)

  • pattern_4: # seqlets: 75
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 75 / 19137 regions (0.4%)

metacluster_10, # patterns: 5, # seqlets: 706, important for: Oct4,Sox2
  • pattern_0: # seqlets: 329
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 325 / 19137 regions (1.7%)

  • pattern_1: # seqlets: 117
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 115 / 19137 regions (0.6%)

  • pattern_2: # seqlets: 84
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 84 / 19137 regions (0.4%)

  • pattern_3: # seqlets: 83
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 83 / 19137 regions (0.4%)

  • pattern_4: # seqlets: 93
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 93 / 19137 regions (0.5%)

metacluster_13, # patterns: 1, # seqlets: 178, important for: Nanog,Oct4
  • pattern_0: # seqlets: 178
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 176 / 19137 regions (0.9%)

metacluster_15, # patterns: 1, # seqlets: 235, important for: Oct4
  • pattern_0: # seqlets: 235
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 230 / 19137 regions (1.2%)

metacluster_18, # patterns: 3, # seqlets: 363, important for: Klf4,Oct4
  • pattern_0: # seqlets: 197
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 195 / 19137 regions (1.0%)

  • pattern_1: # seqlets: 74
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 74 / 19137 regions (0.4%)

  • pattern_2: # seqlets: 92
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 91 / 19137 regions (0.5%)

metacluster_21, # patterns: 1, # seqlets: 222, important for: Klf4,-Nanog,-Oct4,-Sox2
  • pattern_0: # seqlets: 222
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 219 / 19137 regions (1.1%)

metacluster_22, # patterns: 1, # seqlets: 264, important for: Sox2
  • pattern_0: # seqlets: 264
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 261 / 19137 regions (1.4%)

metacluster_23, # patterns: 2, # seqlets: 195, important for: Klf4,Nanog,Oct4
  • pattern_0: # seqlets: 85
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 85 / 19137 regions (0.4%)

  • pattern_1: # seqlets: 110
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 110 / 19137 regions (0.6%)

metacluster_25, # patterns: 1, # seqlets: 193, important for: Klf4,Sox2
  • pattern_0: # seqlets: 193
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 191 / 19137 regions (1.0%)

metacluster_28, # patterns: 1, # seqlets: 166, important for: Klf4,-Sox2
  • pattern_0: # seqlets: 166
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 165 / 19137 regions (0.9%)

metacluster_31, # patterns: 1, # seqlets: 141, important for: Klf4,-Oct4,-Sox2
  • pattern_0: # seqlets: 141
    Aggregated profiles and contribution scores)
    Aggregated hypothetical contribution scores)
    Sequence

    ChIP-nexus counts
    Importance scores (profile)
    Importance scores (counts)
    Positional distribution
    Total count distribution

    Pattern occurs in 140 / 19137 regions (0.7%)

In [16]:
print("Metaclusters heatmap")
import seaborn as sns
activity_patterns = np.array(mr.f.f['metaclustering_results']['attribute_vectors'])[
                    np.array(
        [x[0] for x in sorted(
                enumerate(mr.f.f['metaclustering_results']['metacluster_indices']),
               key=lambda x: x[1])])]
sns.heatmap(activity_patterns, center=0);
Metaclusters heatmap