url_dir = "http://mitra.stanford.edu/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/valid/plots/"
modisco_dir = "/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/valid"
# Parameters
url_dir = "http://mitra.stanford.edu/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/test/plots/"
modisco_dir = "."
from basepair.modisco import ModiscoResult
from basepair.config import get_data_dir
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from plotnine import *
mr = ModiscoResult(f"{modisco_dir}/modisco.h5")
mr.open()
# Number of patterns
len(mr.patterns())
# Number of metaclusters
len(mr.metaclusters())
mc_stat = mr.metacluster_stats()
ggplot(aes(x="pattern", y='n'), mc_stat) + geom_bar(stat='identity') + \
facet_wrap("~metacluster", ncol=4, labeller='label_both') + \
ylab("Number of seqlets") + theme_classic()
ggplot(aes(x="pattern", y='n'), mc_stat) + geom_bar(stat='identity') + \
facet_wrap("~metacluster", ncol=4, labeller='label_both') + \
ylab("Number of seqlets") + theme_classic() + coord_cartesian(ylim=[0, 500])
mcs_grouped = mc_stat.groupby("metacluster").n.agg(["count", "sum"]).reset_index()
fig, ax = plt.subplots(2, 1, sharex=False, figsize=(18,6),
gridspec_kw={'height_ratios': [2,1]})
mcs_grouped.plot("metacluster", "count",
label="# patterns per metacluster", style="o--",
ax=ax[0],
yticks=range(mcs_grouped['count'].max()+1),
xticks=range(38),
fontsize='large',
xlim=(-.5, len(mr.metaclusters()) - .5 ))
mcs_grouped.plot("metacluster", "sum",
label="# seqlets per metacluster",
style="o--", ax=ax[0], secondary_y=True)
ax[0].grid(linewidth=0.2)
mr.plot_metacluster_activity(ax[1], cbar=False)
ax[1].set_title("Importance score activity: Red = positive, Blue = negative");
mr.vdom(url_dir, is_open=True, trim_frac=0.08, letter_width=0.15, height=0.5)
print("Metaclusters heatmap")
import seaborn as sns
activity_patterns = np.array(mr.f.f['metaclustering_results']['attribute_vectors'])[
np.array(
[x[0] for x in sorted(
enumerate(mr.f.f['metaclustering_results']['metacluster_indices']),
key=lambda x: x[1])])]
sns.heatmap(activity_patterns, center=0);