In [1]:
from collections import OrderedDict
exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE'
imp_score = 'profile/wn'

motifs = OrderedDict([
    ("Oct4-Sox2", 'Oct4/m0_p0'),
    ("Oct4", 'Oct4/m0_p1'),
    # ("Strange-sym-motif", 'Oct4/m0_p5'),
    ("Sox2", 'Sox2/m0_p1'),
    ("Nanog", 'Nanog/m0_p1'),
    ("Zic3", 'Nanog/m0_p2'),
    ("Nanog-partner", 'Nanog/m0_p4'),
    ("Klf4", 'Klf4/m0_p0'),
])
In [2]:
# Imports
from basepair.imports import *
from plotnine import *
import plotnine
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
paper_config()

# interval columns in dfi
interval_cols = ['example_chrom', 'pattern_start_abs', 'pattern_end_abs']
Using TensorFlow backend.
In [4]:
from basepair.exp.paper.config import *
In [5]:
# figures dir
model_dir = models_dir / exp
fdir = Path(f'{ddir}/figures/modisco/{exp}/spacing/')
fdir_individual = fdir / 'individual'
fdir_individual_sim = fdir / 'individual-simulation'
In [7]:
dfab = pd.read_csv(f"{model_dir}/deeplift/dfab.csv.gz")
In [ ]:
dfab.head()
In [35]:
df = dfab[(dfab.center_diff <= 100) & 
          (dfab.motif_pair == 'Nanog<>Nanog')]
In [36]:
motif_pair_name = 'Nanog<>Nanog'
plotnine.options.figure_size = get_figsize(2, aspect=2/10*4 / 2)
xmin = 5
xmax = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), df) + 
 # plot
 geom_histogram(breaks=np.arange(xmin, xmax+1)) + facet_grid("strand_combination~.") + 
 # Theme, labels, colors
 theme_bw(base_size=10, base_family='Arial') + 
 theme(strip_text = element_text(rotation=0), legend_position='top') +  
 xlab("Pairwise distance") +
 ggtitle(motif_pair_name) + 
 scale_x_continuous(breaks=np.arange(xmin, xmax, step=5),
                    minor_breaks=np.arange(xmin, xmax, step=1)) +
 scale_fill_brewer(type='qual', palette=3))
 # axis_ticks_major_x()
display(fig)
# fig.save(fdir_individual / f'{motif_pair_name}.large.pdf')
<ggplot: (8755103742370)>
In [15]:
df.head()
Out[15]:
example_chrom_x example_end_x example_idx example_interval_from_task_x example_start_x example_strand_x imp/Klf4_x imp/Nanog_x imp/Oct4_x imp/Sox2_x imp_max_x imp_max_task_x imp_weighted_x imp_weighted_cat_x imp_weighted_p_x match/Klf4_x match/Nanog_x match/Oct4_x match/Sox2_x match_max_x match_max_task_x match_weighted_x match_weighted_cat_x match_weighted_p_x pattern_x pattern_center_x pattern_end_x pattern_end_abs_x pattern_len_x pattern_name_x pattern_short_x pattern_start_x pattern_start_abs_x seq_match_x seq_match_cat_x seq_match_p_x strand_x tf_x pattern_center_aln_x pattern_strand_aln_x row_idx_x id_x Klf4/profile_match_x Klf4/profile_match_p_x Klf4/profile_counts_x Klf4/profile_counts_p_x Klf4/profile_max_x Klf4/profile_max_p_x Klf4/profile_counts_max_ref_x Klf4/profile_counts_max_ref_p_x Nanog/profile_match_x Nanog/profile_match_p_x Nanog/profile_counts_x Nanog/profile_counts_p_x Nanog/profile_max_x Nanog/profile_max_p_x Nanog/profile_counts_max_ref_x Nanog/profile_counts_max_ref_p_x Oct4/profile_match_x Oct4/profile_match_p_x Oct4/profile_counts_x Oct4/profile_counts_p_x Oct4/profile_max_x Oct4/profile_max_p_x Oct4/profile_counts_max_ref_x Oct4/profile_counts_max_ref_p_x Sox2/profile_match_x Sox2/profile_match_p_x Sox2/profile_counts_x Sox2/profile_counts_p_x Sox2/profile_max_x Sox2/profile_max_p_x Sox2/profile_counts_max_ref_x Sox2/profile_counts_max_ref_p_x example_chrom_y example_end_y example_interval_from_task_y example_start_y example_strand_y imp/Klf4_y imp/Nanog_y imp/Oct4_y imp/Sox2_y imp_max_y imp_max_task_y imp_weighted_y imp_weighted_cat_y imp_weighted_p_y match/Klf4_y match/Nanog_y match/Oct4_y match/Sox2_y match_max_y match_max_task_y match_weighted_y match_weighted_cat_y match_weighted_p_y pattern_y pattern_center_y pattern_end_y pattern_end_abs_y pattern_len_y pattern_name_y pattern_short_y pattern_start_y pattern_start_abs_y seq_match_y seq_match_cat_y seq_match_p_y strand_y tf_y pattern_center_aln_y pattern_strand_aln_y row_idx_y id_y Klf4/profile_match_y Klf4/profile_match_p_y Klf4/profile_counts_y Klf4/profile_counts_p_y Klf4/profile_max_y Klf4/profile_max_p_y Klf4/profile_counts_max_ref_y Klf4/profile_counts_max_ref_p_y Nanog/profile_match_y Nanog/profile_match_p_y Nanog/profile_counts_y Nanog/profile_counts_p_y Nanog/profile_max_y Nanog/profile_max_p_y Nanog/profile_counts_max_ref_y Nanog/profile_counts_max_ref_p_y Oct4/profile_match_y Oct4/profile_match_p_y Oct4/profile_counts_y Oct4/profile_counts_p_y Oct4/profile_max_y Oct4/profile_max_p_y Oct4/profile_counts_max_ref_y Oct4/profile_counts_max_ref_p_y Sox2/profile_match_y Sox2/profile_match_p_y Sox2/profile_counts_y Sox2/profile_counts_p_y Sox2/profile_max_y Sox2/profile_max_p_y Sox2/profile_counts_max_ref_y Sox2/profile_counts_max_ref_p_y center_diff center_diff_aln strand_combination motif_pair Nanog/profile_counts_max_ref_x_log Nanog/profile_counts_max_ref_y_log Sox2/profile_counts_max_ref_x_log
261310 chr3 1.2215e+08 1 Oct4 1.2215e+08 . NaN 1.1791 NaN NaN 1.1791 Nanog 1.1791 high 0.9987 NaN 0.5380 NaN NaN 0.5380 Nanog 0.5380 medium 0.4364 Nanog/metacluster_0/p... 464.0 468.0 1.2215e+08 9.0 Nanog Nanog/m0_p1 459.0 1.2215e+08 3.9726 low 0.1077 + Nanog 464.0 - 263219.0 263219.0 5.1041 0.5008 57.0001 0.5666 4.0 0.7477 2.0000e-06 0.7316 3.9462 0.3717 3579.0000 0.9963 281.0 0.9987 1.0 0.6347 3.5280 0.2090 2202.0000 0.9993 104.0 0.9987 2.0000e-06 0.6411 3.9084 0.0751 567.0001 0.9983 27.0 0.9950 3.0 0.9789 chr3 1.2215e+08 Oct4 1.2215e+08 . NaN 1.3014 NaN NaN 1.3014 Nanog 1.3014 high 0.9997 NaN 0.6316 NaN NaN 0.6316 Nanog 0.6316 high 0.7398 Nanog/metacluster_0/p... 485.0 489.0 1.2215e+08 9.0 Nanog Nanog/m0_p1 480.0 1.2215e+08 5.6011 medium 0.3318 + Nanog 485.0 - 263220.0 263220.0 4.7505 0.4059 64.0001 0.6206 4.0 0.7477 1.0000e+00 0.7316 2.3451 0.1969 4213.0 0.9990 281.0 0.9987 122.0 0.9973 2.0313 0.0644 2771.0000 0.9993 104.0 0.9987 17.0 0.9856 2.3545 0.0104 795.0001 0.9993 29.0 0.9960 10.0 0.9977 21.0 21.0 ++ Nanog<>Nanog 0.3010 2.0899 0.6021
261313 chr3 1.2215e+08 1 Oct4 1.2215e+08 . NaN 1.3014 NaN NaN 1.3014 Nanog 1.3014 high 0.9997 NaN 0.6316 NaN NaN 0.6316 Nanog 0.6316 high 0.7398 Nanog/metacluster_0/p... 485.0 489.0 1.2215e+08 9.0 Nanog Nanog/m0_p1 480.0 1.2215e+08 5.6011 medium 0.3318 + Nanog 485.0 - 263220.0 263220.0 4.7505 0.4059 64.0001 0.6206 4.0 0.7477 1.0000e+00 0.7316 2.3451 0.1969 4213.0000 0.9990 281.0 0.9987 122.0 0.9973 2.0313 0.0644 2771.0000 0.9993 104.0 0.9987 1.7000e+01 0.9856 2.3545 0.0104 795.0001 0.9993 29.0 0.9960 10.0 0.9977 chr3 1.2215e+08 Oct4 1.2215e+08 . NaN 1.1344 NaN NaN 1.1344 Nanog 1.1344 high 0.9966 NaN 0.5557 NaN NaN 0.5557 Nanog 0.5557 medium 0.4921 Nanog/metacluster_0/p... 527.0 531.0 1.2215e+08 9.0 Nanog Nanog/m0_p1 522.0 1.2215e+08 5.6011 medium 0.3318 + Nanog 527.0 - 263221.0 263221.0 3.9538 0.2452 90.0001 0.7444 4.0 0.7477 2.0000e-06 0.7316 0.3554 0.0081 5494.0 1.0000 248.0 0.9980 211.0 0.9997 0.3638 0.0034 3398.0000 1.0000 98.0 0.9987 39.0 0.9977 0.4669 0.0034 1036.0000 1.0000 33.0 0.9970 16.0 0.9990 42.0 42.0 ++ Nanog<>Nanog 2.0899 2.3263 1.0414
261315 chr3 1.2215e+08 1 Oct4 1.2215e+08 . NaN 1.1344 NaN NaN 1.1344 Nanog 1.1344 high 0.9966 NaN 0.5557 NaN NaN 0.5557 Nanog 0.5557 medium 0.4921 Nanog/metacluster_0/p... 527.0 531.0 1.2215e+08 9.0 Nanog Nanog/m0_p1 522.0 1.2215e+08 5.6011 medium 0.3318 + Nanog 527.0 - 263221.0 263221.0 3.9538 0.2452 90.0001 0.7444 4.0 0.7477 2.0000e-06 0.7316 0.3554 0.0081 5494.0000 1.0000 248.0 0.9980 211.0 0.9997 0.3638 0.0034 3398.0000 1.0000 98.0 0.9987 3.9000e+01 0.9977 0.4669 0.0034 1036.0000 1.0000 33.0 0.9970 16.0 0.9990 chr3 1.2215e+08 Oct4 1.2215e+08 . NaN 0.7304 NaN NaN 0.7304 Nanog 0.7304 high 0.9237 NaN 0.4998 NaN NaN 0.4998 Nanog 0.4998 low 0.3095 Nanog/metacluster_0/p... 548.0 552.0 1.2215e+08 9.0 Nanog Nanog/m0_p1 543.0 1.2215e+08 3.9726 low 0.1077 + Nanog 548.0 - 263222.0 263222.0 3.7226 0.2107 105.0001 0.7890 5.0 0.8316 1.0000e+00 0.7316 0.2655 0.0037 6653.0 1.0000 297.0 0.9993 222.0 1.0000 0.2509 0.0023 3309.0000 1.0000 98.0 0.9987 22.0 0.9909 0.4081 0.0034 1007.0001 1.0000 33.0 0.9970 11.0 0.9977 21.0 21.0 ++ Nanog<>Nanog 2.3263 2.3483 1.2304
261316 chr4 1.2554e+08 56 Oct4 1.2553e+08 . NaN 0.2922 NaN NaN 0.2922 Nanog 0.2922 low 0.2605 NaN 0.5820 NaN NaN 0.5820 Nanog 0.5820 medium 0.5842 Nanog/metacluster_0/p... 431.0 435.0 1.2553e+08 9.0 Nanog Nanog/m0_p1 426.0 1.2553e+08 5.6739 medium 0.3442 + Nanog 431.0 - 263240.0 263240.0 1.6106 0.0295 385.0001 0.9809 16.0 0.9842 2.0000e+00 0.8601 1.0755 0.0812 891.0001 0.9524 62.0 0.9701 1.0 0.6347 1.4816 0.0386 632.0001 0.9775 23.0 0.9705 8.0000e+00 0.9574 3.8865 0.0748 113.0001 0.9460 4.0 0.8729 6.0 0.9953 chr4 1.2554e+08 Oct4 1.2553e+08 . NaN 0.5308 NaN NaN 0.5308 Nanog 0.5308 high 0.7499 NaN 0.6137 NaN NaN 0.6137 Nanog 0.6137 high 0.6871 Nanog/metacluster_0/p... 470.0 474.0 1.2553e+08 9.0 Nanog Nanog/m0_p1 465.0 1.2553e+08 7.9618 high 0.7600 + Nanog 470.0 - 263241.0 263241.0 1.0167 0.0091 528.0001 0.9933 16.0 0.9842 4.0000e+00 0.9440 0.7461 0.0460 1611.0 0.9802 104.0 0.9839 72.0 0.9943 0.7070 0.0084 1242.0000 0.9960 71.0 0.9973 13.0 0.9769 2.3704 0.0104 259.0001 0.9893 18.0 0.9886 1.0 0.8994 39.0 39.0 ++ Nanog<>Nanog 0.3010 1.8633 0.8451
261317 chr2 1.7077e+08 76 Oct4 1.7077e+08 . NaN 0.2731 NaN NaN 0.2731 Nanog 0.2731 low 0.2096 NaN 0.6504 NaN NaN 0.6504 Nanog 0.6504 high 0.7995 Nanog/metacluster_0/p... 397.0 402.0 1.7077e+08 9.0 Nanog Nanog/m0_p1 393.0 1.7077e+08 5.6011 medium 0.3318 - Nanog 397.0 + 263244.0 263244.0 2.6274 0.0798 150.0001 0.8799 5.0 0.8316 2.0000e+00 0.8601 0.6339 0.0342 630.0001 0.9212 27.0 0.9212 16.0 0.9567 2.0737 0.0668 328.0001 0.9443 11.0 0.9255 5.0000e+00 0.9195 4.0915 0.0839 113.0001 0.9460 5.0 0.9054 2.0 0.9564 chr2 1.7077e+08 Oct4 1.7077e+08 . NaN 0.1738 NaN NaN 0.1738 Nanog 0.1738 low 0.0192 NaN 0.4776 NaN NaN 0.4776 Nanog 0.4776 low 0.2413 Nanog/metacluster_0/p... 446.0 451.0 1.7077e+08 9.0 Nanog Nanog/m0_p1 442.0 1.7077e+08 3.9044 low 0.1012 - Nanog 446.0 + 263245.0 263245.0 2.4882 0.0741 200.0001 0.9215 8.0 0.9279 5.0000e+00 0.9611 0.4569 0.0164 1512.0 0.9789 104.0 0.9839 31.0 0.9815 1.3107 0.0268 629.0001 0.9775 22.0 0.9691 7.0 0.9480 2.5963 0.0138 263.0001 0.9899 15.0 0.9826 5.0 0.9936 49.0 49.0 ++ Nanog<>Nanog 1.2304 1.5052 0.4771
In [17]:
df.pattern_start_abs_x
Out[17]:
261310    1.2215e+08
261313    1.2215e+08
261315    1.2215e+08
             ...    
281036    3.4693e+07
281038    3.4693e+07
281039    1.2162e+08
Name: pattern_start_abs_x, Length: 10182, dtype: float64
In [37]:
features = ['example_chrom', 'pattern_start_abs', 'pattern_end_abs', 'strand']
In [38]:
def suffix(l, suffix):
    return [x +suffix for x in l]
In [39]:
periodicity_dir = Path('/users/avsec/gdrive/projects/chipnexus/data/periodicity/')
In [43]:
(df[['center_diff', 'strand_combination'] + suffix(features, '_x') + suffix(features, '_y')]
 .to_csv(periodicity_dir / 'Nanog<>Nanog.pairs.csv', index=False))