exp_chipnexus_id
exp = exp_chipnexus_id
motifs = OrderedDict([('Oct4-Sox2', 'm0_p0'),
('Sox2', 'm0_p3'),
('Nanog', 'm0_p1')])
# ('Klf4', 'm1_p0')])
model_dir = odir / exp
# Common paths
modisco_dir = model_dir / f"deeplift/Sox2/out/profile/wn"
mr = ModiscoResult(modisco_dir / 'modisco.h5')
!ls {modisco_dir}
patterns = read_pkl(modisco_dir / 'patterns.pkl')
name2pattern = {p.name: p for p in patterns}
# for p in patterns:
# p.plot(["profile"]);
name2pattern['metacluster_0/pattern_3'].plot("profile");
## Note - seems that
p = mr.get_pattern('metacluster_0/pattern_3')
tf = 'Nanog'
exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE'
motifs = OrderedDict([('Oct4-Sox2', 'm0_p0'),
('Sox2', 'm0_p3'),
('Nanog', 'm0_p1')])
# ('Klf4', 'm1_p0')])
model_dir = odir / exp
# Common paths
modisco_dir = model_dir / f"deeplift/{tf}/out/profile/wn"
exp = 'nexus,gw,OSNK,1,0,0,FALSE,same,0.5,64,25,0.001,9,FALSE'
motifs = OrderedDict([('Oct4-Sox2', 'm0_p0'),
('Sox2', 'm0_p5'),
('Nanog', 'm0_p2'),
('Nanog2', 'm0_p8'),
('Klf4', 'm0_p1')
])
# ('Klf4', 'm1_p0')])
# chipseq
exp = 'seq,gw,OSN,1,0,0,FALSE,same,0.5,64,50,0.001,9,FALSE'
model_dir = odir / exp
# Common paths
modisco_dir = model_dir / f"deeplift/{tf}/out/class/pre-act"
motifs = OrderedDict([('Oct4-Sox2', 'm0_p0'),
# ('Sox2', 'm0_p5'),
('Nanog', 'm0_p3'),
#('Nanog2', 'm0_p8'),
#('Klf4', 'm0_p1')
])
# ('Klf4', 'm1_p0')])
exp = 'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE,[1,50],TRUE/'
motifs = OrderedDict([('Oct4-Sox2', 'm0_p0'),
# ('Sox2', 'm0_p5'),
('Nanog', 'm0_p1'),
#('Nanog2', 'm0_p8'),
#('Klf4', 'm0_p1')
])
# ('Klf4', 'm1_p0')])
model_dir = odir / exp
# Common paths
modisco_dir = model_dir / f"deeplift/{tf}/out/profile/wn"
# figures dir
fdir = Path(f'{ddir}/figures/modisco/{exp}/spacing/')
ls {model_dir}/deeplift/Nanog/out/profile/wn/instances.parq/pattern=metacluster_0/pattern_0/
!ls ../../chipnexus/train/seqmodel/output/nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE/deeplift/Oct4/out/profile/wn/instances.parq
dfi = load_instances(modisco_dir / 'instances.parq', motifs, dedup=False)
dfi = filter_nonoverlapping_intervals(dfi)
dfi_subset = dfi.query('match_weighted_p > .2').query('imp_weighted_p > 0')
from basepair.exp.chipnexus.spacing import coocurrence_plot
from basepair.exp.chipnexus.spacing import co_occurence_matrix
# debug
dfi_subset['pattern_strand_aln'] = dfi_subset['strand']
dfi_subset['pattern_center_aln'] = dfi_subset['pattern_center']
# co-occurence
fig, ax = plt.subplots(figsize=get_figsize(.25, aspect=1))
dist = 150
coocurrence_plot(dfi_subset, list(motifs),
query_string=f"(abs(pattern_center_aln_x- pattern_center_aln_y) <= {dist})")
ax.set_ylabel("Motif of interest")
ax.set_xlabel("Motif partner");
fig.savefig(fdir / f'coocurrence.test.center_diff<{dist}.pdf')
# subsets = ['center_diff <= 35', 'center_diff > 35', 'center_diff > 70']
subsets = dist_subsets
fig,axes = plt.subplots(1, len(subsets),
figsize=get_figsize(0.25*len(subsets), .8/len(subsets)),
sharey=True)
for i, (subset,ax, subset_label) in enumerate(zip(subsets, axes, dist_subset_labels)):
if i == len(subsets) - 1:
cbar = True
else:
cbar = False
coocurrence_plot(dfi_subset, list(motifs), query_string=subset, ax=ax, cbar=cbar)
if i == 0:
ax.set_ylabel("Motif of interest")
ax.set_xlabel("Motif partner");
ax.set_title(subset_label)
# plt.tight_layout()
fig.savefig(fdir / f'coocurrence.test.all-dist.pdf')
plotnine.options.figure_size = get_figsize(.5, aspect=2)# (10, 10)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair != 'Oct4-Sox2<>Sox2')]) +
geom_histogram(bins=max_dist) +
facet_grid("motif_pair~ .") +
theme_classic(base_size=10, base_family='Arial') +
theme(strip_text = element_text(rotation=0), legend_position='top') +
xlim([0, max_dist]) +
ylim([0, 1000]) +
xlab("Pairwise distance") +
scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'histogram.center_diff.all.pdf')
fig
plotnine.options.figure_size = get_figsize(.3, aspect=2/10*4)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) +
geom_vline(xintercept=10, alpha=0.1) +
geom_vline(xintercept=20, alpha=0.1) +
geom_vline(xintercept=30, alpha=0.1) +
geom_vline(xintercept=40, alpha=0.1) +
geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") +
theme_classic(base_size=10, base_family='Arial') +
theme(strip_text = element_text(rotation=0), legend_position='top') +
xlim([0, max_dist]) +
xlab("Pairwise distance") +
ggtitle("Nanog<>Nanog") +
scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'individual/nanog.spacing.pdf')
fig
plotnine.options.figure_size = get_figsize(.5, aspect=2)# (10, 10)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair != 'Oct4-Sox2<>Sox2')]) +
geom_histogram(bins=max_dist) +
facet_grid("motif_pair~ .") +
theme_classic(base_size=10, base_family='Arial') +
theme(strip_text = element_text(rotation=0), legend_position='top') +
xlim([0, max_dist]) +
ylim([0, 1000]) +
xlab("Pairwise distance") +
scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'histogram.center_diff.all.pdf')
fig
plotnine.options.figure_size = get_figsize(.3, aspect=2/10*4)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) +
geom_vline(xintercept=10, alpha=0.1) +
geom_vline(xintercept=20, alpha=0.1) +
geom_vline(xintercept=30, alpha=0.1) +
geom_vline(xintercept=40, alpha=0.1) +
geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") +
theme_classic(base_size=10, base_family='Arial') +
theme(strip_text = element_text(rotation=0), legend_position='top') +
xlim([0, max_dist]) +
xlab("Pairwise distance") +
ggtitle("Nanog<>Nanog") +
scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'individual/nanog.spacing.pdf')
fig
plotnine.options.figure_size = get_figsize(.5, aspect=2)# (10, 10)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair != 'Oct4-Sox2<>Sox2')]) +
geom_histogram(bins=max_dist) +
facet_grid("motif_pair~ .") +
theme_classic(base_size=10, base_family='Arial') +
theme(strip_text = element_text(rotation=0), legend_position='top') +
xlim([0, max_dist]) +
ylim([0, 1000]) +
xlab("Pairwise distance") +
scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'histogram.center_diff.all.pdf')
fig
plotnine.options.figure_size = get_figsize(.3, aspect=2/10*4)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) +
geom_vline(xintercept=10, alpha=0.1) +
geom_vline(xintercept=20, alpha=0.1) +
geom_vline(xintercept=30, alpha=0.1) +
geom_vline(xintercept=40, alpha=0.1) +
geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") +
theme_classic(base_size=10, base_family='Arial') +
theme(strip_text = element_text(rotation=0), legend_position='top') +
xlim([0, max_dist]) +
xlab("Pairwise distance") +
ggtitle("Nanog<>Nanog") +
scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'individual/nanog.spacing.pdf')
fig
plotnine.options.figure_size = get_figsize(.5, aspect=2)# (10, 10)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair != 'Oct4-Sox2<>Sox2')]) +
geom_histogram(bins=max_dist) +
facet_grid("motif_pair~ .") +
theme_classic(base_size=10, base_family='Arial') +
theme(strip_text = element_text(rotation=0), legend_position='top') +
xlim([0, max_dist]) +
xlab("Pairwise distance") +
scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'histogram.center_diff.all.pdf')
fig
plotnine.options.figure_size = get_figsize(.3, aspect=2/10*4)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) +
geom_vline(xintercept=10, alpha=0.1) +
geom_vline(xintercept=20, alpha=0.1) +
geom_vline(xintercept=30, alpha=0.1) +
geom_vline(xintercept=40, alpha=0.1) +
geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") +
theme_classic(base_size=10, base_family='Arial') +
theme(strip_text = element_text(rotation=0), legend_position='top') +
xlim([0, max_dist]) +
xlab("Pairwise distance") +
ggtitle("Nanog<>Nanog") +
scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'individual/nanog.spacing.pdf')
fig
plotnine.options.figure_size = get_figsize(.3, aspect=2/10*4)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) +
geom_vline(xintercept=10, alpha=0.1) +
geom_vline(xintercept=20, alpha=0.1) +
geom_vline(xintercept=30, alpha=0.1) +
geom_vline(xintercept=40, alpha=0.1) +
geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") +
theme_classic(base_size=10, base_family='Arial') +
theme(strip_text = element_text(rotation=0), legend_position='top') +
xlim([0, max_dist]) +
xlab("Pairwise distance") +
ggtitle("Nanog<>Nanog") +
scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'individual/nanog.spacing.pdf')
fig