# Bias-corrected results
fig = plot_patterns(list(reversed(short_patterns_clustered)), tasks, pattern_trim=(26-5, 51-5))
figures
fig.savefig(figures / 'per-tf.short-patterns.nexus.bias-corrected.pdf')
fig.savefig(figures / 'per-tf.short-patterns.nexus.bias-corrected.png')
fig = plot_patterns(short_patterns_clustered, tasks, pattern_trim=(26, 51))
fig.savefig(figures / 'per-tf.short-patterns.nexus.bias-corrected.pdf')
fig.savefig(figures / 'per-tf.short-patterns.nexus.bias-corrected.png')
[0, 2] rangetasks = ['Oct4', 'Sox2', 'Nanog', 'Klf4']
model_dir = sdir / 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE-2'
mr_dict = [(task, ModiscoResult(model_dir / f"deeplift/{task}/out/profile/wn/modisco.h5"))
for task in tasks]
for k,v in mr_dict: v.open()
footprints = {t: read_pkl(model_dir / f"deeplift/{t}/out/profile/wn/footprints.pkl")
for t in tasks}
patterns = get_patterns(mr_dict, footprints, tasks, min_n_seqlets=300)
short_patterns_clustered = cluster_align_patterns([p for p in patterns if p.seq_info_content < 30], n_clusters=len(patterns))
# non-bias corrected
fig, ax = plt.subplots(figsize=get_figsize(.25))
ax.hist([p.seq_info_content for p in patterns], 20);
ax.set_ylabel("Frequency");
ax.set_xlabel("Sequence IC");
# fig.savefig(figures / 'per-tf.pattern-length.nexus.bias-corrected.pdf', bbox_inches='tight', pad_inches=0)
fig = plot_patterns(short_patterns_clustered, tasks, pattern_trim=(26, 45))
fig.savefig(figures / 'per-tf.short-patterns.chip-nexus.peaks-profile.not-bias-corrected.pdf')
fig.savefig(figures / 'per-tf.short-patterns.chip-nexus.peaks-profile.not-bias-corrected.png')
model_dir = sdir / 'nexus,gw,OSNK,1,0,0,FALSE,valid,0.5,64,25,0.001,9,FALSE'
mr_dict = [(task, ModiscoResult(model_dir / f"deeplift/{task}/out/class/pre-act/modisco.h5"))
for task in tasks]
for k,v in mr_dict: v.open()
footprints = {t: read_pkl(model_dir / f"deeplift/{t}/out/class/pre-act/footprints.pkl")
for t in tasks}
patterns = get_patterns(mr_dict, footprints, tasks, min_n_seqlets=300)
short_patterns_clustered = cluster_align_patterns([p for p in patterns if p.seq_info_content < 30], n_clusters=len(patterns))
# non-bias corrected
fig, ax = plt.subplots(figsize=get_figsize(.25))
ax.hist([p.seq_info_content for p in patterns], 20);
ax.set_ylabel("Frequency");
ax.set_xlabel("Sequence IC");
# fig.savefig(figures / 'per-tf.pattern-length.nexus.bias-corrected.pdf', bbox_inches='tight', pad_inches=0)
fig = plot_patterns(short_patterns_clustered, tasks, pattern_trim=(26, 45))
fig.savefig(figures / 'per-tf.short-patterns.chip-nexus.gw-binary.pdf')
fig.savefig(figures / 'per-tf.short-patterns.chip-nexus.gw-binary.png')
chipseq_tasks = ['Oct4', 'Sox2', 'Nanog']
model_dir = sdir / 'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE,[1,50],TRUE'
mr_dict = [(task, ModiscoResult(model_dir / f"deeplift/{task}/out/profile/wn/modisco.h5"))
for task in chipseq_tasks]
for k,v in mr_dict: v.open()
footprints = {t: read_pkl(model_dir / f"deeplift/{t}/out/profile/wn/footprints.pkl")
for t in chipseq_tasks}
patterns = get_patterns(mr_dict, footprints, chipseq_tasks, min_n_seqlets=300)
short_patterns_clustered = cluster_align_patterns([p for p in patterns if p.seq_info_content < 30], n_clusters=len(patterns))
# non-bias corrected
fig, ax = plt.subplots(figsize=get_figsize(.25))
ax.hist([p.seq_info_content for p in patterns], 20);
ax.set_ylabel("Frequency");
ax.set_xlabel("Sequence IC");
# fig.savefig(figures / 'per-tf.pattern-length.nexus.bias-corrected.pdf', bbox_inches='tight', pad_inches=0)
fig = plot_patterns(short_patterns_clustered, tasks=chipseq_tasks, pattern_trim=(26, 45))
fig.savefig(figures / 'per-tf.short-patterns.chip-seq.peaks-profile.bias-corrected.pdf')
fig.savefig(figures / 'per-tf.short-patterns.chip-seq.peaks-profile.bias-corrected.png')
model_dir = sdir / 'seq,gw,OSN,1,0,0,FALSE,valid,0.5,64,50,0.001,9,FALSE'
mr_dict = [(task, ModiscoResult(model_dir / f"deeplift/{task}/out/class/pre-act/modisco.h5"))
for task in chipseq_tasks]
for k,v in mr_dict: v.open()
footprints = {t: read_pkl(model_dir / f"deeplift/{t}/out/class/pre-act/footprints.pkl")
for t in chipseq_tasks}
patterns = get_patterns(mr_dict, footprints, chipseq_tasks, min_n_seqlets=300)
short_patterns_clustered = cluster_align_patterns([p for p in patterns if p.seq_info_content < 30], n_clusters=len(patterns))
# non-bias corrected
fig, ax = plt.subplots(figsize=get_figsize(.25))
ax.hist([p.seq_info_content for p in patterns], 20);
ax.set_ylabel("Frequency");
ax.set_xlabel("Sequence IC");
# fig.savefig(figures / 'per-tf.pattern-length.nexus.bias-corrected.pdf', bbox_inches='tight', pad_inches=0)
fig = plot_patterns(short_patterns_clustered, chipseq_tasks, pattern_trim=(30, 50))
fig.savefig(figures / 'per-tf.short-patterns.chip-seq.gw-binary.bias-corrected.pdf')
fig.savefig(figures / 'per-tf.short-patterns.chip-seq.gw-binary.bias-corrected.png')
model_dir = sdir / 'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE'
mr_dict = [(task, ModiscoResult(model_dir / f"deeplift/{task}/out/profile/wn/modisco.h5"))
for task in chipseq_tasks]
for k,v in mr_dict: v.open()
footprints = {t: read_pkl(model_dir / f"deeplift/{t}/out/profile/wn/footprints.pkl")
for t in chipseq_tasks}
patterns = get_patterns(mr_dict, footprints, chipseq_tasks, min_n_seqlets=300)
short_patterns_clustered = cluster_align_patterns([p for p in patterns if p.seq_info_content < 30], n_clusters=len(patterns))
# non-bias corrected
fig, ax = plt.subplots(figsize=get_figsize(.25))
ax.hist([p.seq_info_content for p in patterns], 20);
ax.set_ylabel("Frequency");
ax.set_xlabel("Sequence IC");
# fig.savefig(figures / 'per-tf.pattern-length.nexus.bias-corrected.pdf', bbox_inches='tight', pad_inches=0)
fig = plot_patterns(short_patterns_clustered, chipseq_tasks, pattern_trim=(26, 45))
fig.savefig(figures / 'per-tf.short-patterns.chip-seq.peaks-profile.not-bias-corrected.pdf')
fig.savefig(figures / 'per-tf.short-patterns.chip-seq.peaks-profile.not-bias-corrected.png')