from collections import OrderedDict
exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE'
imp_score = 'profile/wn'
motifs = OrderedDict([
("Oct4-Sox2", 'Oct4/m0_p0'),
("Oct4", 'Oct4/m0_p1'),
# ("Strange-sym-motif", 'Oct4/m0_p5'),
("Sox2", 'Sox2/m0_p1'),
("Nanog", 'Nanog/m0_p1'),
("Zic3", 'Nanog/m0_p2'),
("Nanog-partner", 'Nanog/m0_p4'),
("Klf4", 'Klf4/m0_p0'),
])
# Imports
from basepair.imports import *
from plotnine import *
import plotnine
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
paper_config()
# interval columns in dfi
interval_cols = ['example_chrom', 'pattern_start_abs', 'pattern_end_abs']
from basepair.exp.paper.config import *
# figures dir
model_dir = models_dir / exp
fdir = Path(f'{ddir}/figures/modisco/{exp}/spacing/')
fdir_individual = fdir / 'individual'
fdir_individual_sim = fdir / 'individual-simulation'
dfab = pd.read_csv(f"{model_dir}/deeplift/dfab.csv.gz")
dfab.head()
df = dfab[(dfab.center_diff <= 100) &
(dfab.motif_pair == 'Nanog<>Nanog')]
motif_pair_name = 'Nanog<>Nanog'
plotnine.options.figure_size = get_figsize(2, aspect=2/10*4 / 2)
xmin = 5
xmax = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), df) +
# plot
geom_histogram(breaks=np.arange(xmin, xmax+1)) + facet_grid("strand_combination~.") +
# Theme, labels, colors
theme_bw(base_size=10, base_family='Arial') +
theme(strip_text = element_text(rotation=0), legend_position='top') +
xlab("Pairwise distance") +
ggtitle(motif_pair_name) +
scale_x_continuous(breaks=np.arange(xmin, xmax, step=5),
minor_breaks=np.arange(xmin, xmax, step=1)) +
scale_fill_brewer(type='qual', palette=3))
# axis_ticks_major_x()
display(fig)
# fig.save(fdir_individual / f'{motif_pair_name}.large.pdf')
df.head()
df.pattern_start_abs_x
features = ['example_chrom', 'pattern_start_abs', 'pattern_end_abs', 'strand']
def suffix(l, suffix):
return [x +suffix for x in l]
periodicity_dir = Path('/users/avsec/gdrive/projects/chipnexus/data/periodicity/')
(df[['center_diff', 'strand_combination'] + suffix(features, '_x') + suffix(features, '_y')]
.to_csv(periodicity_dir / 'Nanog<>Nanog.pairs.csv', index=False))