from collections import OrderedDict
exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE'
imp_score = 'profile/wn'
motifs = OrderedDict([
("Oct4-Sox2", 'Oct4/m0_p0'),
("Oct4", 'Oct4/m0_p1'),
# ("Strange-sym-motif", 'Oct4/m0_p5'),
("Sox2", 'Sox2/m0_p1'),
("Nanog", 'Nanog/m0_p1'),
("Zic3", 'Nanog/m0_p2'),
("Nanog-partner", 'Nanog/m0_p4'),
("Klf4", 'Klf4/m0_p0'),
])
from basepair.imports import *
from basepair.exp.paper.config import *
paper_config()
fdir = Path(f'{ddir}/figures/modisco/{exp}/')
model_dir = models_dir / exp
ls {model_dir}
ls {model_dir}
import modisco
window_fn = modisco.coordproducers.get_simple_window_sum_function(21)
isf = ImpScoreFile(model_dir / 'deeplift.imp_score.h5', default_imp_score='profile/wn')
contrib_scores = isf.get_contrib()
paper_config()
isf_null = ImpScoreFile(model_dir / 'null.deeplift.imp_score.h5', default_imp_score='profile/wn')
contrib_scores_null = isf_null.get_contrib()
ranges = isf.get_ranges()
ranges.head()
oct4_range = ranges.interval_from_task == 'Oct4'
task = 'Oct4'
tasks
sl_d = dict()
sl_null_d = dict()
for i, task in enumerate(tasks):
seqlet_scores = np.stack(window_fn(contrib_scores[task][ranges.interval_from_task == task]))
sl_d[task] = np.ravel(seqlet_scores)
seqlet_scores_null = np.stack(window_fn(contrib_scores_null[task]))
sl_null_d[task] = np.ravel(seqlet_scores_null)
fig, axes = plt.subplots(2, len(tasks), figsize=get_figsize(1, .3), sharex=True, sharey=True, gridspec_kw=dict(hspace=0, wspace=0))
for i, task in enumerate(tasks):
axes[0, i].set_title(task)
axes[0, i].hist(sl_d[task], 100, log=True);
axes[1, i].hist(sl_null_d[task], 100, log=True);
axes[0, 0].set_ylabel("Signal")
axes[1, 0].set_ylabel("Null")
axes[1, 1].set_xlabel("Importance (21bp window)")
np.median(sl_d['Oct4'])
plt.hist(sl_d['Oct4'], 100, log=False);
fig.savefig(fdir / 'seqlet-imp-histogram.21bp.pdf')