In [ ]:
 
In [169]:
exp_chipnexus_id
Out[169]:
'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE'
In [141]:
exp = exp_chipnexus_id
motifs = OrderedDict([('Oct4-Sox2', 'm0_p0'),
             ('Sox2', 'm0_p3'),
             ('Nanog', 'm0_p1')])
             # ('Klf4', 'm1_p0')])
    
model_dir = odir / exp
# Common paths
modisco_dir = model_dir / f"deeplift/Sox2/out/profile/wn"
In [143]:
mr = ModiscoResult(modisco_dir / 'modisco.h5')
In [149]:
!ls {modisco_dir}
centroid_seqlet_matches.csv	     patterns.pkl
cluster-patterns.html		     pattern_table.csv
cluster-patterns.ipynb		     pattern_table.html
footprints.pkl			     pattern_table.sorted.csv
hparams.yaml			     pattern_table.sorted.html
instances.parq			     plots
kwargs.json			     report.html
log				     report.ipynb
modisco.h5			     results.html
modisco-instances.smk-benchmark.tsv  results.ipynb
modisco.smk-benchmark.tsv	     seqlets
motif_clustering
In [150]:
patterns = read_pkl(modisco_dir / 'patterns.pkl')
In [152]:
name2pattern = {p.name: p for p in patterns}
In [163]:
# for p in patterns:
#     p.plot(["profile"]);
In [162]:
name2pattern['metacluster_0/pattern_3'].plot("profile");
In [ ]:
## Note - seems that 
In [144]:
p = mr.get_pattern('metacluster_0/pattern_3')
In [126]:
tf = 'Nanog'

exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE'
motifs = OrderedDict([('Oct4-Sox2', 'm0_p0'),
             ('Sox2', 'm0_p3'),
             ('Nanog', 'm0_p1')])
             # ('Klf4', 'm1_p0')])
    
model_dir = odir / exp
# Common paths
modisco_dir = model_dir / f"deeplift/{tf}/out/profile/wn"

    
exp = 'nexus,gw,OSNK,1,0,0,FALSE,same,0.5,64,25,0.001,9,FALSE'

motifs = OrderedDict([('Oct4-Sox2', 'm0_p0'),
             ('Sox2', 'm0_p5'),
             ('Nanog', 'm0_p2'),
             ('Nanog2', 'm0_p8'),
             ('Klf4', 'm0_p1')
             ])
             # ('Klf4', 'm1_p0')])

# chipseq    
exp = 'seq,gw,OSN,1,0,0,FALSE,same,0.5,64,50,0.001,9,FALSE'


model_dir = odir / exp
# Common paths
modisco_dir = model_dir / f"deeplift/{tf}/out/class/pre-act"

motifs = OrderedDict([('Oct4-Sox2', 'm0_p0'),
             # ('Sox2', 'm0_p5'),
             ('Nanog', 'm0_p3'),
             #('Nanog2', 'm0_p8'),
             #('Klf4', 'm0_p1')
             ])
             # ('Klf4', 'm1_p0')])    
exp = 'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE,[1,50],TRUE/'    



motifs = OrderedDict([('Oct4-Sox2', 'm0_p0'),
             # ('Sox2', 'm0_p5'),
             ('Nanog', 'm0_p1'),
             #('Nanog2', 'm0_p8'),
             #('Klf4', 'm0_p1')
             ])
             # ('Klf4', 'm1_p0')])    

model_dir = odir / exp
# Common paths
modisco_dir = model_dir / f"deeplift/{tf}/out/profile/wn"

# figures dir
fdir = Path(f'{ddir}/figures/modisco/{exp}/spacing/')
In [130]:
ls {model_dir}/deeplift/Nanog/out/profile/wn/instances.parq/pattern=metacluster_0/pattern_0/
part.0.parquet
In [131]:
!ls ../../chipnexus/train/seqmodel/output/nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE/deeplift/Oct4/out/profile/wn/instances.parq
_common_metadata  _metadata  pattern=metacluster_0

Load motif pairs

In [135]:
dfi = load_instances(modisco_dir / 'instances.parq', motifs, dedup=False)
dfi = filter_nonoverlapping_intervals(dfi)

dfi_subset = dfi.query('match_weighted_p > .2').query('imp_weighted_p > 0')

Co-occurence test

<150bp

In [62]:
from basepair.exp.chipnexus.spacing import coocurrence_plot
from basepair.exp.chipnexus.spacing import co_occurence_matrix
In [66]:
# debug
dfi_subset['pattern_strand_aln'] = dfi_subset['strand']
dfi_subset['pattern_center_aln'] = dfi_subset['pattern_center']
In [67]:
# co-occurence
fig, ax = plt.subplots(figsize=get_figsize(.25, aspect=1))
dist = 150
coocurrence_plot(dfi_subset, list(motifs),
                 query_string=f"(abs(pattern_center_aln_x- pattern_center_aln_y) <= {dist})")
ax.set_ylabel("Motif of interest")
ax.set_xlabel("Motif partner");
fig.savefig(fdir / f'coocurrence.test.center_diff<{dist}.pdf')

All distance ranges

In [68]:
# subsets = ['center_diff <= 35', 'center_diff > 35', 'center_diff > 70']
subsets = dist_subsets
fig,axes = plt.subplots(1, len(subsets), 
                        figsize=get_figsize(0.25*len(subsets), .8/len(subsets)), 
                        sharey=True)
for i, (subset,ax, subset_label) in enumerate(zip(subsets, axes, dist_subset_labels)):
    if i == len(subsets) - 1:
        cbar = True
    else:
        cbar = False
    coocurrence_plot(dfi_subset, list(motifs), query_string=subset, ax=ax, cbar=cbar)
    if i == 0:
        ax.set_ylabel("Motif of interest")
        ax.set_xlabel("Motif partner");
    
    ax.set_title(subset_label)
# plt.tight_layout()
fig.savefig(fdir / f'coocurrence.test.all-dist.pdf')

Pairwise distance distribution

Seq - profile

In [138]:
plotnine.options.figure_size = get_figsize(.5, aspect=2)# (10, 10)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair != 'Oct4-Sox2<>Sox2')]) + 
 geom_histogram(bins=max_dist) + 
 facet_grid("motif_pair~ .") + 
 theme_classic(base_size=10, base_family='Arial') + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 ylim([0, 1000]) + 
 xlab("Pairwise distance") + 
 scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'histogram.center_diff.all.pdf')
fig 
Out[138]:
<ggplot: (-9223363291122037623)>
In [139]:
plotnine.options.figure_size = get_figsize(.3, aspect=2/10*4)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) + 
 geom_vline(xintercept=10, alpha=0.1) + 
 geom_vline(xintercept=20, alpha=0.1) + 
 geom_vline(xintercept=30, alpha=0.1) + 
 geom_vline(xintercept=40, alpha=0.1) + 
 geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") + 
 theme_classic(base_size=10, base_family='Arial') + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 xlab("Pairwise distance") +
 ggtitle("Nanog<>Nanog") + 
 scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'individual/nanog.spacing.pdf')
fig
Out[139]:
<ggplot: (-9223363291121854916)>

Seq - binary

In [124]:
plotnine.options.figure_size = get_figsize(.5, aspect=2)# (10, 10)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair != 'Oct4-Sox2<>Sox2')]) + 
 geom_histogram(bins=max_dist) + 
 facet_grid("motif_pair~ .") + 
 theme_classic(base_size=10, base_family='Arial') + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 ylim([0, 1000]) + 
 xlab("Pairwise distance") + 
 scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'histogram.center_diff.all.pdf')
fig 
Out[124]:
<ggplot: (-9223363291567501943)>
In [125]:
plotnine.options.figure_size = get_figsize(.3, aspect=2/10*4)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) + 
 geom_vline(xintercept=10, alpha=0.1) + 
 geom_vline(xintercept=20, alpha=0.1) + 
 geom_vline(xintercept=30, alpha=0.1) + 
 geom_vline(xintercept=40, alpha=0.1) + 
 geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") + 
 theme_classic(base_size=10, base_family='Arial') + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 xlab("Pairwise distance") +
 ggtitle("Nanog<>Nanog") + 
 scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'individual/nanog.spacing.pdf')
fig
Out[125]:
<ggplot: (-9223363291567658510)>

Nexus - binary

In [110]:
plotnine.options.figure_size = get_figsize(.5, aspect=2)# (10, 10)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair != 'Oct4-Sox2<>Sox2')]) + 
 geom_histogram(bins=max_dist) + 
 facet_grid("motif_pair~ .") + 
 theme_classic(base_size=10, base_family='Arial') + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 ylim([0, 1000]) + 
 xlab("Pairwise distance") + 
 scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'histogram.center_diff.all.pdf')
fig 
Out[110]:
<ggplot: (8746067683988)>
In [109]:
plotnine.options.figure_size = get_figsize(.3, aspect=2/10*4)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) + 
 geom_vline(xintercept=10, alpha=0.1) + 
 geom_vline(xintercept=20, alpha=0.1) + 
 geom_vline(xintercept=30, alpha=0.1) + 
 geom_vline(xintercept=40, alpha=0.1) + 
 geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") + 
 theme_classic(base_size=10, base_family='Arial') + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 xlab("Pairwise distance") +
 ggtitle("Nanog<>Nanog") + 
 scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'individual/nanog.spacing.pdf')
fig
Out[109]:
<ggplot: (-9223363290624282869)>

Profile models

In [80]:
plotnine.options.figure_size = get_figsize(.5, aspect=2)# (10, 10)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair != 'Oct4-Sox2<>Sox2')]) + 
 geom_histogram(bins=max_dist) + 
 facet_grid("motif_pair~ .") + 
 theme_classic(base_size=10, base_family='Arial') + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 xlab("Pairwise distance") + 
 scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'histogram.center_diff.all.pdf')
fig 
Out[80]:
<ggplot: (-9223363290629183821)>

Nanog

In [83]:
plotnine.options.figure_size = get_figsize(.3, aspect=2/10*4)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) + 
 geom_vline(xintercept=10, alpha=0.1) + 
 geom_vline(xintercept=20, alpha=0.1) + 
 geom_vline(xintercept=30, alpha=0.1) + 
 geom_vline(xintercept=40, alpha=0.1) + 
 geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") + 
 theme_classic(base_size=10, base_family='Arial') + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 xlab("Pairwise distance") +
 ggtitle("Nanog<>Nanog") + 
 scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'individual/nanog.spacing.pdf')
fig
Out[83]:
<ggplot: (8746225588165)>
In [23]:
plotnine.options.figure_size = get_figsize(.3, aspect=2/10*4)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) + 
 geom_vline(xintercept=10, alpha=0.1) + 
 geom_vline(xintercept=20, alpha=0.1) + 
 geom_vline(xintercept=30, alpha=0.1) + 
 geom_vline(xintercept=40, alpha=0.1) + 
 geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") + 
 theme_classic(base_size=10, base_family='Arial') + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 xlab("Pairwise distance") +
 ggtitle("Nanog<>Nanog") + 
 scale_fill_brewer(type='qual', palette=3))
fig.save(fdir / 'individual/nanog.spacing.pdf')
fig
Out[23]:
<ggplot: (-9223363310659608965)>