In [242]:
from collections import OrderedDict
exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE'
imp_score = 'profile/wn'

motifs = OrderedDict([
    ("Oct4-Sox2", 'Oct4/m0_p0'),
    ("Oct4", 'Oct4/m0_p1'),
    # ("Strange-sym-motif", 'Oct4/m0_p5'),
    ("Sox2", 'Sox2/m0_p1'),
    ("Nanog", 'Nanog/m0_p1'),
    # ("Zic3", 'Nanog/m0_p2'),
    ("Nanog-partner", 'Nanog/m0_p4'),
    ("Klf4", 'Klf4/m0_p0'),
])

Goal

  • make sub-plots for Figure 5

Open questions

  • should we make a larger co-occurence plot?
    • plot more motifs (one from each cluster at least)
In [2]:
# Imports
from basepair.imports import *
from basepair.math import softmax
from basepair.plot.heatmaps import heatmap_stranded_profile, multiple_heatmap_stranded_profile
from basepair.plot.profiles import plot_stranded_profile, multiple_plot_stranded_profile, extract_signal
from basepair.plot.tracks import plot_tracks, filter_tracks
from basepair.modisco.results import MultipleModiscoResult, Seqlet, resize_seqlets
from basepair.modisco.pattern_instances import (multiple_load_instances, load_instances, filter_nonoverlapping_intervals, 
                                                plot_coocurence_matrix, align_instance_center, dfi2seqlets, annotate_profile)
from basepair.exp.chipnexus.perturb.vdom import vdom_motif_pair, plot_spacing_hist
from basepair.exp.chipnexus.spacing import remove_edge_instances, get_motif_pairs, motif_pair_dfi
from basepair.exp.chipnexus.simulate import (insert_motif, generate_sim, plot_sim, generate_seq, 
                                             model2tasks, motif_coords, interactive_tracks, plot_motif_table,
                                             plot_sim_motif_col)
from basepair.exp.paper.config import *
from basepair.cli.modisco import load_profiles
from basepair.cli.imp_score import ImpScoreFile
from basepair.preproc import rc_seq, dfint_no_intersection
from copy import deepcopy
from scipy.fftpack import fft, ifft
from plotnine import *
import plotnine
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
paper_config()

# interval columns in dfi
interval_cols = ['example_chrom', 'pattern_start_abs', 'pattern_end_abs']
Using TensorFlow backend.
In [221]:
# figures dir
model_dir = models_dir / exp
fdir = Path(f'{ddir}/figures/modisco/{exp}/spacing-raw-data')
fdir.mkdir(exist_ok=True)
In [4]:
!mkdir -p {fdir_individual}
!mkdir -p {fdir_individual_sim}
!mkdir -p {fdir}
In [243]:
# Generate motif pairs
pairs = get_motif_pairs(motifs)

# ordered names
pair_names = ["<>".join(x) for x in pairs]
In [6]:
# define the global set of distances
dist_subsets = ['center_diff<=35',
               '(center_diff>35)&(center_diff<=70)', 
               '(center_diff>70)&(center_diff<=150)', 
               'center_diff>150']
dist_subset_labels = ['dist < 35',
                      '35 < dist <= 70',
                      '70 < dist <= 150',
                      '150 < dist',
                     ]
In [8]:
dfi_subset = pd.read_parquet(f"{model_dir}/deeplift/dfi_subset.parq", engine='fastparquet')
In [9]:
from basepair.exp.chipnexus.spacing import coocurrence_plot
from basepair.exp.chipnexus.spacing import co_occurence_matrix
In [10]:
from collections import OrderedDict
In [11]:
main_motifs = OrderedDict([(k,v)
                            for k,v in motifs.items()
                            if k in ['Oct4-Sox2', 'Sox2', 'Nanog', 'Klf4']
                           ])
In [12]:
# subsets = ['center_diff <= 35', 'center_diff > 35', 'center_diff > 70']
subsets = dist_subsets
fig,axes = plt.subplots(1, len(subsets), 
                        figsize=get_figsize(0.25*len(subsets), .8/len(subsets)), 
                        sharey=True)
# dfs = dfi_subset.query('match_weighted_p > .2').query('imp_weighted_p > .2')
dfs = dfi_subset
for i, (subset,ax, subset_label) in enumerate(zip(subsets, axes, dist_subset_labels)):
    if i == len(subsets) - 1:
        cbar = True
    else:
        cbar = False
    coocurrence_plot(dfs, list(main_motifs), query_string=f"({subset}) & (center_diff_aln > 5)" , ax=ax, cbar=cbar)
    if i == 0:
        ax.set_ylabel("Motif of interest")
        ax.set_xlabel("Motif partner");
    
    ax.set_title(subset_label)
# plt.tight_layout()
# fig.savefig(fdir / f'coocurrence.test.all-dist.main-motifs.pdf')

Pairwise spacing Nanog

In [9]:
from basepair.exp.chipnexus.spacing import remove_edge_instances, get_motif_pairs, motif_pair_dfi
from basepair.stats import smooth_gam, smooth_lowess
Using TensorFlow backend.
In [14]:
pairs = get_motif_pairs(motifs)
In [15]:
# create motif pairs
dfab = pd.concat([motif_pair_dfi(dfi_subset, pair).assign(motif_pair="<>".join(pair)) for pair in pairs], axis=0)
In [1]:
dfab = pd.read_csv("~/gdrive/projects/chipnexus/data/dfab.csv.gz")
In [26]:
dfabm = dfab[(dfab.motif_pair == 'Nanog<>Nanog') & (dfab.strand_combination == '++')]
In [34]:
paper_config()
In [216]:
def smooth_gam(x, y, n_splines=100, lam=None):
    from pygam import ExpectileGAM, LinearGAM, s, f
    gam = LinearGAM(s(0, n_splines=n_splines)).gridsearch(x, y, lam=lam)
    # gam = ExpectileGAM(s(0, n_splines=n_splines), expectile=0.5, lam=lam).gridsearch(x.values.reshape((-1,1)), y)
    XX = gam.generate_X_grid(term=0)
    confi = gam.confidence_intervals(XX)
    # confi = gam.prediction_intervals(XX)
    ym = gam.predict_mu(XX)
    return XX[:, 0], ym, confi
In [238]:
profile_mapping
Out[238]:
{'Oct4-Sox2': 'Oct4',
 'Oct4': 'Oct4',
 'Sox2': 'Sox2',
 'Nanog': 'Nanog',
 'Klf4': 'Klf4',
 'Essrb': 'Oct4'}
In [247]:
profile_mapping['Nanog-partner'] = 'Nanog'
In [248]:
for i, motif_pair in enumerate(pairs):
    same_motif = motif_pair[0] == motif_pair[1]
    if same_motif:
        fig, axes = plt.subplots(3, 1, figsize=get_figsize(.25, 2), 
                                 sharex=True, 
                                 sharey=True,
                                 gridspec_kw=dict(wspace=0, hspace=0))
    else:
        fig, axes = plt.subplots(4, 2, figsize=get_figsize(.5, 2/3*2), 
                                 sharex=True, 
                                 sharey=True,
                                 gridspec_kw=dict(wspace=0, hspace=0))

    motif_pair_name = '<>'.join(motif_pair)

    for j, strand_combination in enumerate(['++', '+-', '-+', '--']):
        dfabm = dfab[(dfab.motif_pair == motif_pair_name) & (dfab.strand_combination == strand_combination)]

        if len(dfabm) == 0:
            continue

        xmax = 100
        x = dfabm[dfabm.center_diff<xmax].center_diff.values
        for k in range(2):
            last_k = k == 1 or same_motif
            if same_motif and k == 1:
                continue
            if same_motif:
                ax = axes[j]
                tf = profile_mapping[motif_pair[0]]
                y = np.log(1+(dfabm[dfabm.center_diff<xmax][f'{tf}/profile_counts_max_ref_y'].values + 
                              dfabm[dfabm.center_diff<xmax][f'{tf}/profile_counts_max_ref_x'].values))
            else:
                ax = axes[j, k]
                tf = profile_mapping[motif_pair[k]]
                y = np.log(1+dfabm[dfabm.center_diff<xmax][f'{tf}/profile_counts_max_ref_x'].values)

            xm,ym, ci = smooth_gam(x[:, np.newaxis], y, n_splines=xmax//2, lam=np.logspace(1, 6, 11))

            ax.scatter(x, y, alpha=0.1, s=1);
            cmap = plt.get_cmap("tab10")
            color = cmap(0)
            ax.plot(xm, ym, color=color);
            if ci is not None:
                ax.fill_between(xm, ci[:, 0], ci[:, 1], alpha=0.4, color=color);
            # plt.xlim([5, 100])
            if last_k:
                ax.set_xlabel("Motif distance")
            if j == 1 and k == 0:
                ax.set_ylabel("Max profile value")
            if j == 0:
                ax.set_title(motif_pair[k])
            if last_k:
                ax.text(1, 1,strand_combination,
                     horizontalalignment='right',
                     verticalalignment='top',
                     transform = ax.transAxes)
            sns.despine(top=True, right=True)
        fig.savefig(fdir / f'{motif_pair_name}.pdf')
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:03 Time:  0:00:03
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/matplotlib/pyplot.py:514: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00
100% (11 of 11) |########################| Elapsed Time: 0:00:01 Time:  0:00:01