-
# Imports
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from basepair.imports import *
hv.extension('bokeh')
from basepair.plot.config import paper_config
paper_config()
# Common paths
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
modisco_dir = model_dir / f"modisco/all/profile/"
old_dist = HDF5Reader.load(modisco_dir / 'strand_distances.h5')
plt.hist(old_dist['distances'], 100);
plt.title("Old distribution");
np.percentile(old_dist['distances'], [10, 25, 50, 75, 90])
modisco_dir = model_dir / f"modisco/all/deeplift/profile/"
new_dist = HDF5Reader.load(modisco_dir / 'strand_distances.h5')
plt.hist(new_dist['distances'], 100);
plt.title("New distribution");
np.percentile(new_dist['distances'], [10, 25, 50, 75, 90])
from basepair.cli.imp_score import ImpScoreFile
from basepair.cli.modisco import load_imp_scores, load_included_samples
# TODO - add the following methods
# - plot(idx, 'inputs'), ... which plots all the examples.
imp_scores = ImpScoreFile(model_dir / "deeplift.all.h5")
worst_sequences = np.argsort( - new_dist['distances'])
idx = worst_sequences[1]
tasks = imp_scores.get_tasks()
seq = imp_scores.f.f['/inputs'][idx]
from basepair.plot.tracks import plot_tracks, filter_tracks
hyp_contrib = [(f"{t}/{s}", imp_scores.f.f[f'/hyp_imp/{t}/count/{si}'][idx])
for t in imp_scores.get_tasks()
for si,s in enumerate(['pos', 'neg'])]
contrib = [(s, v*seq) for s,v in hyp_contrib]
contrib[0][1].max()
a=1
[np.abs(x).max() for k,x in hyp_contrib]
[np.abs(x).max() for k,x in contrib]
idx = worst_sequences[-1]
tasks = imp_scores.get_tasks()
seq = imp_scores.f.f['/inputs'][idx]
from basepair.plot.tracks import plot_tracks, filter_tracks
hyp_contrib = [(f"{t}/{s}", imp_scores.f.f[f'/hyp_imp/{t}/weighted/{si}'][idx])
for t in imp_scores.get_tasks()
for si,s in enumerate(['pos', 'neg'])]
contrib = [(s, v*seq) for s,v in hyp_contrib]
contrib[0][1].max()
plot_tracks(filter_tracks(contrib, [400, 600]), fig_height_per_track=0.5, fig_width=10)
[np.abs(x).max() for k,x in hyp_contrib]
[np.abs(x).max() for k,x in contrib]