from collections import OrderedDict
exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE'
imp_score = 'profile/wn'
motifs = OrderedDict([
("Oct4-Sox2", 'Oct4/m0_p0'),
("Oct4", 'Oct4/m0_p1'),
# ("Strange-sym-motif", 'Oct4/m0_p5'),
("Sox2", 'Sox2/m0_p1'),
("Nanog", 'Nanog/m0_p1'),
# ("Zic3", 'Nanog/m0_p2'),
("Nanog-partner", 'Nanog/m0_p4'),
("Klf4", 'Klf4/m0_p0'),
])
# Imports
from basepair.imports import *
from basepair.math import softmax
from basepair.plot.heatmaps import heatmap_stranded_profile, multiple_heatmap_stranded_profile
from basepair.plot.profiles import plot_stranded_profile, multiple_plot_stranded_profile, extract_signal
from basepair.plot.tracks import plot_tracks, filter_tracks
from basepair.modisco.results import MultipleModiscoResult, Seqlet, resize_seqlets
from basepair.modisco.pattern_instances import (multiple_load_instances, load_instances, filter_nonoverlapping_intervals,
plot_coocurence_matrix, align_instance_center, dfi2seqlets, annotate_profile)
from basepair.exp.chipnexus.perturb.vdom import vdom_motif_pair, plot_spacing_hist
from basepair.exp.chipnexus.spacing import remove_edge_instances, get_motif_pairs, motif_pair_dfi
from basepair.exp.chipnexus.simulate import (insert_motif, generate_sim, plot_sim, generate_seq,
model2tasks, motif_coords, interactive_tracks, plot_motif_table,
plot_sim_motif_col)
from basepair.exp.paper.config import *
from basepair.cli.modisco import load_profiles
from basepair.cli.imp_score import ImpScoreFile
from basepair.preproc import rc_seq, dfint_no_intersection
from copy import deepcopy
from scipy.fftpack import fft, ifft
from plotnine import *
import plotnine
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
paper_config()
# interval columns in dfi
interval_cols = ['example_chrom', 'pattern_start_abs', 'pattern_end_abs']
# figures dir
model_dir = models_dir / exp
fdir = Path(f'{ddir}/figures/modisco/{exp}/spacing-raw-data')
fdir.mkdir(exist_ok=True)
!mkdir -p {fdir_individual}
!mkdir -p {fdir_individual_sim}
!mkdir -p {fdir}
# Generate motif pairs
pairs = get_motif_pairs(motifs)
# ordered names
pair_names = ["<>".join(x) for x in pairs]
# define the global set of distances
dist_subsets = ['center_diff<=35',
'(center_diff>35)&(center_diff<=70)',
'(center_diff>70)&(center_diff<=150)',
'center_diff>150']
dist_subset_labels = ['dist < 35',
'35 < dist <= 70',
'70 < dist <= 150',
'150 < dist',
]
dfi_subset = pd.read_parquet(f"{model_dir}/deeplift/dfi_subset.parq", engine='fastparquet')
from basepair.exp.chipnexus.spacing import coocurrence_plot
from basepair.exp.chipnexus.spacing import co_occurence_matrix
from collections import OrderedDict
main_motifs = OrderedDict([(k,v)
for k,v in motifs.items()
if k in ['Oct4-Sox2', 'Sox2', 'Nanog', 'Klf4']
])
# subsets = ['center_diff <= 35', 'center_diff > 35', 'center_diff > 70']
subsets = dist_subsets
fig,axes = plt.subplots(1, len(subsets),
figsize=get_figsize(0.25*len(subsets), .8/len(subsets)),
sharey=True)
# dfs = dfi_subset.query('match_weighted_p > .2').query('imp_weighted_p > .2')
dfs = dfi_subset
for i, (subset,ax, subset_label) in enumerate(zip(subsets, axes, dist_subset_labels)):
if i == len(subsets) - 1:
cbar = True
else:
cbar = False
coocurrence_plot(dfs, list(main_motifs), query_string=f"({subset}) & (center_diff_aln > 5)" , ax=ax, cbar=cbar)
if i == 0:
ax.set_ylabel("Motif of interest")
ax.set_xlabel("Motif partner");
ax.set_title(subset_label)
# plt.tight_layout()
# fig.savefig(fdir / f'coocurrence.test.all-dist.main-motifs.pdf')
from basepair.exp.chipnexus.spacing import remove_edge_instances, get_motif_pairs, motif_pair_dfi
from basepair.stats import smooth_gam, smooth_lowess
pairs = get_motif_pairs(motifs)
# create motif pairs
dfab = pd.concat([motif_pair_dfi(dfi_subset, pair).assign(motif_pair="<>".join(pair)) for pair in pairs], axis=0)
dfab = pd.read_csv("~/gdrive/projects/chipnexus/data/dfab.csv.gz")
dfabm = dfab[(dfab.motif_pair == 'Nanog<>Nanog') & (dfab.strand_combination == '++')]
paper_config()
def smooth_gam(x, y, n_splines=100, lam=None):
from pygam import ExpectileGAM, LinearGAM, s, f
gam = LinearGAM(s(0, n_splines=n_splines)).gridsearch(x, y, lam=lam)
# gam = ExpectileGAM(s(0, n_splines=n_splines), expectile=0.5, lam=lam).gridsearch(x.values.reshape((-1,1)), y)
XX = gam.generate_X_grid(term=0)
confi = gam.confidence_intervals(XX)
# confi = gam.prediction_intervals(XX)
ym = gam.predict_mu(XX)
return XX[:, 0], ym, confi
profile_mapping
profile_mapping['Nanog-partner'] = 'Nanog'
for i, motif_pair in enumerate(pairs):
same_motif = motif_pair[0] == motif_pair[1]
if same_motif:
fig, axes = plt.subplots(3, 1, figsize=get_figsize(.25, 2),
sharex=True,
sharey=True,
gridspec_kw=dict(wspace=0, hspace=0))
else:
fig, axes = plt.subplots(4, 2, figsize=get_figsize(.5, 2/3*2),
sharex=True,
sharey=True,
gridspec_kw=dict(wspace=0, hspace=0))
motif_pair_name = '<>'.join(motif_pair)
for j, strand_combination in enumerate(['++', '+-', '-+', '--']):
dfabm = dfab[(dfab.motif_pair == motif_pair_name) & (dfab.strand_combination == strand_combination)]
if len(dfabm) == 0:
continue
xmax = 100
x = dfabm[dfabm.center_diff<xmax].center_diff.values
for k in range(2):
last_k = k == 1 or same_motif
if same_motif and k == 1:
continue
if same_motif:
ax = axes[j]
tf = profile_mapping[motif_pair[0]]
y = np.log(1+(dfabm[dfabm.center_diff<xmax][f'{tf}/profile_counts_max_ref_y'].values +
dfabm[dfabm.center_diff<xmax][f'{tf}/profile_counts_max_ref_x'].values))
else:
ax = axes[j, k]
tf = profile_mapping[motif_pair[k]]
y = np.log(1+dfabm[dfabm.center_diff<xmax][f'{tf}/profile_counts_max_ref_x'].values)
xm,ym, ci = smooth_gam(x[:, np.newaxis], y, n_splines=xmax//2, lam=np.logspace(1, 6, 11))
ax.scatter(x, y, alpha=0.1, s=1);
cmap = plt.get_cmap("tab10")
color = cmap(0)
ax.plot(xm, ym, color=color);
if ci is not None:
ax.fill_between(xm, ci[:, 0], ci[:, 1], alpha=0.4, color=color);
# plt.xlim([5, 100])
if last_k:
ax.set_xlabel("Motif distance")
if j == 1 and k == 0:
ax.set_ylabel("Max profile value")
if j == 0:
ax.set_title(motif_pair[k])
if last_k:
ax.text(1, 1,strand_combination,
horizontalalignment='right',
verticalalignment='top',
transform = ax.transAxes)
sns.despine(top=True, right=True)
fig.savefig(fdir / f'{motif_pair_name}.pdf')