Pairwise perturbation

  • This notebooks shows

TODO

  • [x] take top 1500 values per counts, sort by pairwise distance
  • [x] try out different normalizers
  • [x] add also the sequence plot
  • [x] eyeball the plots
    • what would be a good way to quantify the effects? - importance scores
  • [x] implement different feature functions quantifying the interactions
    • [x] given a list of arrays (pred, imp, imp_count), quantify the metric
    • [x] combine differnet metrics with each other
    • [x] add also the max-count metric
      • positions with maximal number of counts (averaged per strands)
  • [x] add scatterplots quantifying the interactions
  • [ ] re-compute motif_centers and motif_diff using proper rounding schemes

TODO

  • [ ] generate all the pdf plots
  • [ ] for scatterplots, show only the corresponding TF in the main (triangular) figure
    • [ ] for the supplementary figures generate all 40 plots
    • [ ] use max count
  • [ ] add the heatmap of A->B type of analysis
    • [ ] show <35 and >35 distance versions
  • [ ] make sure colors look good and are consistent
  • [ ] switch colors so that red means the 'interesting' effect
  • [ ] simulation - generate the max score (same as in the perturbation analysis)
In [1]:
from basepair.utils import read_pkl
from pathlib import Path
from basepair.exp.paper.config import motifs, profile_mapping
from basepair.plot.config import paper_config, get_figsize
from basepair.exp.chipnexus.perturb.vdom import vdom_motif_pair, plot_spacing_hist
from basepair.exp.chipnexus.spacing import remove_edge_instances, get_motif_pairs
from basepair.plot.heatmaps import RowQuantileNormalizer, QuantileTruncateNormalizer
from tqdm import tqdm
from basepair.data import NumpyDataset
from basepair.exp.chipnexus.perturb.gen import *
from basepair.exp.chipnexus.perturb.scores import *
from copy import deepcopy
from plotnine import *
import plotnine
import warnings
warnings.filterwarnings("ignore")

from basepair.config import get_data_dir
paper_config()
ddir = get_data_dir()
Using TensorFlow backend.
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/concise/utils/plot.py:115: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  min_coords = np.vstack(data.min(0) for data in polygons_data).min(0)
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/concise/utils/plot.py:116: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  max_coords = np.vstack(data.max(0) for data in polygons_data).max(0)
In [2]:
# Common paths
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
modisco_dir = model_dir / f"modisco/all/deeplift/profile/"
output_dir = modisco_dir
dataset_dir = output_dir / 'perturbation-analysis'
In [3]:
pairs = get_motif_pairs(motifs)
In [4]:
# load the data
motif_pair_lpdata = read_pkl(dataset_dir / 'motif_pair_lpdata.incl-whole.pkl')
dfab = pd.read_csv(dataset_dir / 'dfab.csv.gz')
In [5]:
dfi_subset = pd.read_csv(dataset_dir / 'dfi_subset.csv.gz')
In [6]:
# write_pkl(motif_pair_lpdata, dataset_dir / 'motif_pair_lpdata.incl-whole.pkl')
In [7]:
# TODO - de-hardcode
tasks = ['Oct4', 'Sox2', 'Nanog', 'Klf4']


Basic stats

In [ ]:
%matplotlib inline
In [ ]:
paper_config()
In [26]:
fig = plt.figure(figsize=get_figsize(.5))
dfab.groupby("motif_pair").size().plot.barh()
plt.axvline(1500)  # displayed cutoff
plt.xlabel("Number of pairs");

Pairwise spacing

In [ ]:
dfab['motif_pair_cat'] = pd.Categorical(dfab.motif_pair, categories=dfab.groupby("motif_pair").size().sort_values(ascending=False).index)
dfab['strand_combination_cat'] = pd.Categorical(dfab['strand_combination'], )
In [28]:
plotnine.options.figure_size = get_figsize(.5, aspect=2)# (10, 10)
max_dist = 100
(ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist)]) + 
 geom_histogram(bins=max_dist) + 
 facet_grid("motif_pair_cat~ .") + 
 theme_classic() + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 xlab("Pairwise distance") + 
 scale_fill_brewer(type='qual', palette=3))
Out[28]:
<ggplot: (8729886685353)>

Pairwise spacing

In [29]:
plotnine.options.figure_size = get_figsize(.5, aspect=2/10*3)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) + 
 geom_vline(xintercept=10, alpha=0.1) + 
 geom_vline(xintercept=20, alpha=0.1) + 
 geom_vline(xintercept=30, alpha=0.1) + 
 geom_vline(xintercept=40, alpha=0.1) + 
 geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") + 
 theme_classic() + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 xlab("Pairwise distance") +
 ggtitle("Nanog<>Nanog") + 
 scale_fill_brewer(type='qual', palette=3))
fig
Out[29]:
<ggplot: (-9223363306968474266)>

TODO - export

Perturbation analysis

In [30]:
output_dir = Path('/srv/www/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/deeplift/profile/perturbation/figures.2')
figures_url = Path('/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/deeplift/profile/perturbation/figures.2')
In [31]:
!mkdir -p {output_dir}

RowQuantileNormalizer

In [323]:
vdom_motif_pair(motif_pair_lpdata, dfab, profile_mapping, figures_dir=str(output_dir /  'RowQuantileNormalizer'), figures_url=str(figures_url /  'RowQuantileNormalizer'), profile_width=200, cache=False, normalizer=RowQuantileNormalizer())
100%|██████████| 10/10 [14:13<00:00, 85.18s/it]
Out[323]:
  • Oct4-Sox2 (perturb Oct4-Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Oct4-Sox2 (perturb Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Sox2 (perturb Oct4-Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Oct4-Sox2 (perturb Nanog)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Nanog (perturb Oct4-Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Oct4-Sox2 (perturb Klf4)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Klf4 (perturb Oct4-Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Sox2 (perturb Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Sox2 (perturb Nanog)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Nanog (perturb Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Sox2 (perturb Klf4)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Klf4 (perturb Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Nanog (perturb Nanog)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Nanog (perturb Klf4)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Klf4 (perturb Nanog)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Klf4 (perturb Klf4)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)

QuantileTruncateNormalizer

In [324]:
vdom_motif_pair(motif_pair_lpdata, dfab, profile_mapping, figures_dir=str(output_dir /  'QuantileTruncateNormalizer'), figures_url=str(figures_url /  'QuantileTruncateNormalizer'), profile_width=200, cache=False, normalizer=QuantileTruncateNormalizer())
100%|██████████| 10/10 [13:34<00:00, 81.29s/it]
Out[324]:
  • Oct4-Sox2 (perturb Oct4-Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Oct4-Sox2 (perturb Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Sox2 (perturb Oct4-Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Oct4-Sox2 (perturb Nanog)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Nanog (perturb Oct4-Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Oct4-Sox2 (perturb Klf4)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Klf4 (perturb Oct4-Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Sox2 (perturb Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Sox2 (perturb Nanog)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Nanog (perturb Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Sox2 (perturb Klf4)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Klf4 (perturb Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Nanog (perturb Nanog)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Nanog (perturb Klf4)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Klf4 (perturb Nanog)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Klf4 (perturb Klf4)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)

log(1+x) normalization

In [325]:
vdom_motif_pair(motif_pair_lpdata, dfab, profile_mapping, figures_dir=str(output_dir /  'lognorm'), figures_url=str(figures_url /  'lognorm'), profile_width=200, cache=False, normalizer=lambda x: np.log(1+x))
100%|██████████| 10/10 [13:43<00:00, 82.69s/it]
Out[325]:
  • Oct4-Sox2 (perturb Oct4-Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Oct4-Sox2 (perturb Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Sox2 (perturb Oct4-Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Oct4-Sox2 (perturb Nanog)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Nanog (perturb Oct4-Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Oct4-Sox2 (perturb Klf4)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Klf4 (perturb Oct4-Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Sox2 (perturb Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Sox2 (perturb Nanog)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Nanog (perturb Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Sox2 (perturb Klf4)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Klf4 (perturb Sox2)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Nanog (perturb Nanog)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Nanog (perturb Klf4)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Klf4 (perturb Nanog)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)
  • Klf4 (perturb Klf4)
    Distance
    Sequences
    Profile (task=Oct4)
    Importance profile (task=Oct4)
    Importance count (task=Oct4)
    Profile (task=Sox2)
    Importance profile (task=Sox2)
    Importance count (task=Sox2)
    Profile (task=Nanog)
    Importance profile (task=Nanog)
    Importance count (task=Nanog)
    Profile (task=Klf4)
    Importance profile (task=Klf4)
    Importance count (task=Klf4)

Summarize data

TODO

  • [x] implement different feature functions quantifying the interactions
    • [x] given a list of arrays (pred, imp, imp_count), quantify the metric
    • [X] combine differnet metrics with each other
    • [x] add also the max-count metric
      • positions with maximal number of counts (averaged per strands)
  • [x] add scatterplots quantifying the interactions
  • [ ] re-compute motif_centers and motif_diff using proper rounding schemes

Synergy scores

  • Wt = total counts in ref
  • dA = total counts after removing A
  • dB = total counts after removing B
  • dAB = total counts after removing both

Then TF-A depends on A with

(Wt-dA)/(Wt - dAB)

and on B with

(Wt-dB)/(Wt - dAB)

In [8]:
def ism_compute_features_tidy(motif_pair_lpdata, tasks,):
    out = []
    for motif_pair_name, lpdata in tqdm(motif_pair_lpdata.items()):
        motif_pair = list(motif_pair_name.split("<>"))
        for task in tasks:
            dfab_sm = lpdata['dfab'].copy()
            dfab_sm['task'] = task
            whole = {k: motif_pair_lpdata[motif_pair_name]['x'][k]['whole'][task]
                      for k in motif_pair_lpdata[motif_pair_name]['x']}
            dfab_sm['Wt_obs'] = whole['ref']['obs'].sum(axis=(1,2))
            dfab_sm['Wt'] = whole['ref']['pred'].sum(axis=(1,2))
            dfab_sm['dA'] = whole['dthis']['pred'].sum(axis=(1,2))
            dfab_sm['dB'] = whole['dother']['pred'].sum(axis=(1,2))
            dfab_sm['dAB'] = whole['dboth']['pred'].sum(axis=(1,2))
            out.append(dfab_sm)
    return pd.concat(out, axis=0)
In [9]:
dfabf_ism = ism_compute_features_tidy(motif_pair_lpdata, tasks)
100%|██████████| 10/10 [00:02<00:00,  3.22it/s]
In [ ]:
# dfs.to_csv(dataset_dir / 'dfs.csv.gz', compression='gzip', index=False)
In [10]:
dfabf_ism.head()
Out[10]:
Unnamed: 0 pattern_x example_idx pattern_start_x pattern_end_x strand_x pattern_len_x pattern_center_x match_weighted_x match_weighted_p_x match_weighted_cat_x match_max_x match_max_task_x imp_weighted_x imp_weighted_p_x imp_weighted_cat_x imp_max_x imp_max_task_x seq_match_x seq_match_p_x seq_match_cat_x match/Klf4_x match/Nanog_x match/Oct4_x match/Sox2_x imp/Klf4_x imp/Nanog_x imp/Oct4_x imp/Sox2_x example_chrom_x example_start_x example_end_x example_strand_x example_interval_from_task_x pattern_short_x pattern_name_x pattern_start_abs_x pattern_end_abs_x pattern_center_aln_x pattern_strand_aln_x row_idx_x pattern_y pattern_start_y pattern_end_y strand_y pattern_len_y pattern_center_y match_weighted_y match_weighted_p_y match_weighted_cat_y match_max_y match_max_task_y imp_weighted_y imp_weighted_p_y imp_weighted_cat_y imp_max_y imp_max_task_y seq_match_y seq_match_p_y seq_match_cat_y match/Klf4_y match/Nanog_y match/Oct4_y match/Sox2_y imp/Klf4_y imp/Nanog_y imp/Oct4_y imp/Sox2_y example_chrom_y example_start_y example_end_y example_strand_y example_interval_from_task_y pattern_short_y pattern_name_y pattern_start_abs_y pattern_end_abs_y pattern_center_aln_y pattern_strand_aln_y row_idx_y center_diff center_diff_aln strand_combination motif_pair motif_pair_idx task Wt_obs Wt dA dB dAB
0 1 metacluster_0/pattern_0 1 437.0 452.0 + 15.0 444.0 0.5391 0.5105 medium 0.5627 Sox2 0.9221 0.8271 low 1.0504 Oct4 9.6558 0.4369 medium 0.5452 0.4714 0.5524 0.5627 0.5605 1.0090 1.0504 0.9147 chr3 1.2215e+08 1.2215e+08 * Oct4 m0_p0 Oct4-Sox2 1.2215e+08 1.2215e+08 448.0 + 0.0 metacluster_0/pattern_0 458.0 473.0 + 15.0 465.0 0.5174 0.3763 medium 0.5613 Oct4 1.0389 0.9077 low 1.2170 Sox2 11.3139 0.7737 high 0.4683 0.4598 0.5613 0.5274 0.5500 1.1198 1.0884 1.2170 chr3 1.2215e+08 1.2215e+08 * Oct4 m0_p0 Oct4-Sox2 1.2215e+08 1.2215e+08 469.0 + 1.0 21.0 21.0 ++ Oct4-Sox2<>Oct4-Sox2 0 Oct4 16198.0 58234.4141 28042.2031 38972.9766 23831.6582
1 2 metacluster_0/pattern_0 1 437.0 452.0 + 15.0 444.0 0.5391 0.5105 medium 0.5627 Sox2 0.9221 0.8271 low 1.0504 Oct4 9.6558 0.4369 medium 0.5452 0.4714 0.5524 0.5627 0.5605 1.0090 1.0504 0.9147 chr3 1.2215e+08 1.2215e+08 * Oct4 m0_p0 Oct4-Sox2 1.2215e+08 1.2215e+08 448.0 + 0.0 metacluster_0/pattern_0 499.0 514.0 + 15.0 506.0 0.5154 0.3673 medium 0.5390 Oct4 1.3206 0.9862 low 1.5825 Oct4 9.8925 0.4740 medium 0.4916 0.4731 0.5390 0.5267 0.6803 1.0429 1.5825 1.5485 chr3 1.2215e+08 1.2215e+08 * Oct4 m0_p0 Oct4-Sox2 1.2215e+08 1.2215e+08 510.0 + 2.0 62.0 62.0 ++ Oct4-Sox2<>Oct4-Sox2 1 Oct4 16198.0 58234.4141 28042.2031 17461.5430 9037.3682
2 3 metacluster_0/pattern_0 1 437.0 452.0 + 15.0 444.0 0.5391 0.5105 medium 0.5627 Sox2 0.9221 0.8271 low 1.0504 Oct4 9.6558 0.4369 medium 0.5452 0.4714 0.5524 0.5627 0.5605 1.0090 1.0504 0.9147 chr3 1.2215e+08 1.2215e+08 * Oct4 m0_p0 Oct4-Sox2 1.2215e+08 1.2215e+08 448.0 + 0.0 metacluster_0/pattern_0 520.0 535.0 + 15.0 527.0 0.4897 0.2392 low 0.5008 Oct4 1.4250 0.9950 low 1.6570 Sox2 7.7218 0.1351 low 0.4555 0.4992 0.5008 0.4895 0.8025 1.2853 1.6062 1.6570 chr3 1.2215e+08 1.2215e+08 * Oct4 m0_p0 Oct4-Sox2 1.2215e+08 1.2215e+08 531.0 + 3.0 83.0 83.0 ++ Oct4-Sox2<>Oct4-Sox2 2 Oct4 16198.0 58234.4141 28042.2031 37178.5391 22600.3633
3 4 metacluster_0/pattern_0 1 437.0 452.0 + 15.0 444.0 0.5391 0.5105 medium 0.5627 Sox2 0.9221 0.8271 low 1.0504 Oct4 9.6558 0.4369 medium 0.5452 0.4714 0.5524 0.5627 0.5605 1.0090 1.0504 0.9147 chr3 1.2215e+08 1.2215e+08 * Oct4 m0_p0 Oct4-Sox2 1.2215e+08 1.2215e+08 448.0 + 0.0 metacluster_0/pattern_0 541.0 556.0 + 15.0 548.0 0.4843 0.2165 low 0.5198 Oct4 1.2319 0.9746 low 1.4439 Oct4 9.6558 0.4369 medium 0.4183 0.4421 0.5198 0.5058 0.6828 1.0318 1.4439 1.4188 chr3 1.2215e+08 1.2215e+08 * Oct4 m0_p0 Oct4-Sox2 1.2215e+08 1.2215e+08 552.0 + 4.0 104.0 104.0 ++ Oct4-Sox2<>Oct4-Sox2 3 Oct4 16198.0 58234.4141 28042.2031 23178.7891 13922.0508
4 7 metacluster_0/pattern_0 1 458.0 473.0 + 15.0 465.0 0.5174 0.3763 medium 0.5613 Oct4 1.0389 0.9077 low 1.2170 Sox2 11.3139 0.7737 high 0.4683 0.4598 0.5613 0.5274 0.5500 1.1198 1.0884 1.2170 chr3 1.2215e+08 1.2215e+08 * Oct4 m0_p0 Oct4-Sox2 1.2215e+08 1.2215e+08 469.0 + 1.0 metacluster_0/pattern_0 499.0 514.0 + 15.0 506.0 0.5154 0.3673 medium 0.5390 Oct4 1.3206 0.9862 low 1.5825 Oct4 9.8925 0.4740 medium 0.4916 0.4731 0.5390 0.5267 0.6803 1.0429 1.5825 1.5485 chr3 1.2215e+08 1.2215e+08 * Oct4 m0_p0 Oct4-Sox2 1.2215e+08 1.2215e+08 510.0 + 2.0 41.0 41.0 ++ Oct4-Sox2<>Oct4-Sox2 4 Oct4 16198.0 58234.4141 38972.9766 17461.5430 9234.9775
In [134]:
dfc.head()
Out[134]:
H3K27ac PolII example_idx
0 12794.0 2184.0 1
1 220274.0 94643.0 2
2 130652.0 86907.0 3
3 76793.0 60257.0 4
4 26238.0 13179.0 5
In [10]:
dfs = dfabf_ism[['Wt_obs', 'Wt', 'dA', 'dB', 'dAB', 'motif_pair', 'task', 'center_diff', 'strand_combination', 'example_idx']]
dfs.head()
Out[10]:
Wt_obs Wt dA dB dAB motif_pair task center_diff strand_combination example_idx
0 16198.0 58234.4141 28042.2031 38972.9766 23831.6582 Oct4-Sox2<>Oct4-Sox2 Oct4 21.0 ++ 1
1 16198.0 58234.4141 28042.2031 17461.5430 9037.3682 Oct4-Sox2<>Oct4-Sox2 Oct4 62.0 ++ 1
2 16198.0 58234.4141 28042.2031 37178.5391 22600.3633 Oct4-Sox2<>Oct4-Sox2 Oct4 83.0 ++ 1
3 16198.0 58234.4141 28042.2031 23178.7891 13922.0508 Oct4-Sox2<>Oct4-Sox2 Oct4 104.0 ++ 1
4 16198.0 58234.4141 38972.9766 17461.5430 9234.9775 Oct4-Sox2<>Oct4-Sox2 Oct4 41.0 ++ 1
In [421]:
plt.scatter(np.log10(1+dfs.Wt), np.log10(1+dfs.Wt_obs), s=1, alpha=0.1)
plt.xlabel("Predicted Wt (log10)")
plt.ylabel("Observed Wt (log10)");
In [422]:
np.log10(1+dfs.Wt_obs).plot.hist(300);
plt.xlabel("Observed Wt (log10)");
In [540]:
dfs.head()
Out[540]:
Wt_obs Wt dA dB dAB motif_pair task center_diff strand_combination
0 16198.0 58234.4141 28042.2031 38972.9766 23831.6582 Oct4-Sox2<>Oct4-Sox2 Oct4 21.0 ++
1 16198.0 58234.4141 28042.2031 17461.5430 9037.3682 Oct4-Sox2<>Oct4-Sox2 Oct4 62.0 ++
2 16198.0 58234.4141 28042.2031 37178.5391 22600.3633 Oct4-Sox2<>Oct4-Sox2 Oct4 83.0 ++
3 16198.0 58234.4141 28042.2031 23178.7891 13922.0508 Oct4-Sox2<>Oct4-Sox2 Oct4 104.0 ++
4 16198.0 58234.4141 38972.9766 17461.5430 9234.9775 Oct4-Sox2<>Oct4-Sox2 Oct4 41.0 ++
In [11]:
# load activity data
import pybedtools
from pybedtools import BedTool
from basepair.extractors import MultiAssayExtractor
from basepair.data import NumpyDataset

df = pd.read_csv(f"{ddir}/processed/chipnexus/external-data.tsv", sep='\t')
dfs = df[df.assay.isin(['PolII', 'H3K27ac'])]

df_regions = dfi_subset[['example_chrom', 'example_start', 'example_end', 'example_idx']].drop_duplicates()
bt = BedTool.from_dataframe(df_regions)

extractor = MultiAssayExtractor(dfs, None, use_strand=False, n_jobs=10)

regions = extractor.extract(list(bt))

r = NumpyDataset(regions)
dfc = pd.DataFrame(r.aggregate(np.sum, axis=1))
dfc['example_idx'] = df_regions['example_idx'].values
[########################################] | 100% Completed |  1min 41.0s
In [ ]:
ls {dataset_dir}
In [14]:
dfs = dfabf_ism[['Wt_obs', 'Wt', 'dA', 'dB', 'dAB', 'motif_pair', 'task', 'center_diff', 'strand_combination', 'example_idx']]
dfs.head()
Out[14]:
Wt_obs Wt dA dB dAB motif_pair task center_diff strand_combination example_idx
0 16198.0 58234.4141 28042.2031 38972.9766 23831.6582 Oct4-Sox2<>Oct4-Sox2 Oct4 21.0 ++ 1
1 16198.0 58234.4141 28042.2031 17461.5430 9037.3682 Oct4-Sox2<>Oct4-Sox2 Oct4 62.0 ++ 1
2 16198.0 58234.4141 28042.2031 37178.5391 22600.3633 Oct4-Sox2<>Oct4-Sox2 Oct4 83.0 ++ 1
3 16198.0 58234.4141 28042.2031 23178.7891 13922.0508 Oct4-Sox2<>Oct4-Sox2 Oct4 104.0 ++ 1
4 16198.0 58234.4141 38972.9766 17461.5430 9234.9775 Oct4-Sox2<>Oct4-Sox2 Oct4 41.0 ++ 1
In [16]:
dfs = pd.merge(dfs, dfc, on='example_idx', how='left')
In [ ]:
# TODO - merge the two and export
In [20]:
dataset_dir
Out[20]:
PosixPath('/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/deeplift/profile/perturbation-analysis')
In [17]:
dfs.to_csv(dataset_dir / 'pair.total_counts.csv.gz', index=False, compression='gzip')  # store the table for Julia
In [637]:
dfs = dfs[dfs.center_diff < 35]

Fraction of positive values

In [638]:
terms = ['Wt-dA>0', 'Wt-dB>0', 'Wt-dAB>0', 'dA-dAB>0', 'dB-dAB>0']
c = pd.concat([dfs.groupby(['motif_pair', 'task']).apply(lambda x: x.eval(term).mean()).reset_index().rename(columns={0:'fraction'}).assign(term=term)
               for term in terms], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)
In [639]:
plotnine.options.figure_size = (10, 4)
(ggplot(aes(y='motif_pair', x='task', fill='fraction'), c) + geom_tile() + theme_classic() +  facet_grid(". ~ term") + 
 theme(legend_position='top')  + 
 scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=.5, limits=[0, 1]) 
)
Out[639]:
<ggplot: (8729441476784)>

TODO - export

Masked scores

In [640]:
mask = 'Wt-dAB>0'
pc = 100  # pseudo-counts
terms = ['(2*Wt-dA-dB)/(Wt-dAB)', f'({pc}+2*Wt-dA-dB)/({pc}+Wt-dAB)', f'(2*Wt-dA-dB)/(Wt-dAB)*({mask})']
c = pd.concat([dfs.groupby(['motif_pair', 'task']).apply(lambda x: x.eval(term).mean() if x.eval(mask).mean() > .9 else np.nan).reset_index().rename(columns={0:'average'}).assign(term=term)
               for term in terms], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)
In [551]:
plotnine.options.figure_size = (9, 4)
(ggplot(aes(y='motif_pair', x='task', fill='average')) + 
 geom_tile(data=c.dropna()) + 
 geom_text(label="/", data=c[c.average.isnull()]) + 
 theme_classic() +  
 facet_grid(". ~ term") + 
 theme(legend_position='top')  + 
 scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=1, limits=[0, 2]) 
)
Out[551]:
<ggplot: (8729479120886)>

Alternative solutions

Median. Mask > .9

In [664]:
mask = 'Wt-dAB>0'
pc = 100  # pseudo-counts
terms = ['(2*Wt-dA-dB)/(Wt-dAB)']
c = pd.concat([dfs.groupby(['motif_pair', 'task']).apply(lambda x: x.eval(term).median() if x.eval(mask).mean() > .9 else np.nan).reset_index().rename(columns={0:'average'}).assign(term=term)
               for term in terms], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)
In [665]:
plotnine.options.figure_size = (3, 4)
(ggplot(aes(y='motif_pair', x='task', fill='average')) + 
 geom_tile(data=c.dropna()) + 
 geom_text(label="/", data=c[c.average.isnull()]) + 
 theme_classic() +  
 facet_grid(". ~ term") + 
 theme(legend_position='top')  + 
 scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=1, limits=[0, 2]) 
)
Out[665]:
<ggplot: (8729495972549)>

TODO - export to PDF

mask > 0.2

In [666]:
mask = 'Wt-dAB>0'
pc = 100  # pseudo-counts
terms = ['(2*Wt-dA-dB)/(Wt-dAB)', f'({pc}+2*Wt-dA-dB)/({pc}+Wt-dAB)', f'(2*Wt-dA-dB)/(Wt-dAB)*({mask})']
c = pd.concat([dfs.groupby(['motif_pair', 'task']).apply(lambda x: x.eval(term).median()).reset_index().rename(columns={0:'average'}).assign(term=term)
               for term in terms], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)
In [667]:
plotnine.options.figure_size = (9, 4)
(ggplot(aes(y='motif_pair', x='task', fill='average')) + 
 geom_tile(data=c.dropna()) + 
 geom_text(label="/", data=c[c.average.isnull()]) + 
 theme_classic() +  
 facet_grid(". ~ term") + 
 theme(legend_position='top')  + 
 scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=1, limits=[0, 2]) 
)
Out[667]:
<ggplot: (8729441468010)>

Sum up the reads in all regions

In [645]:
terms = ['(2*Wt-dA-dB)/(Wt-dAB)', '0.75 + 0.5 *(Wt-dAB > 0)',  '0.75 + 0.5 *(Wt-dA > 0)', '0.75 + 0.5 *(Wt-dB > 0)']
features = ['Wt', 'dA', 'dB', 'dAB']
c = pd.concat([dfs.groupby(['motif_pair', 'task'])[features].sum().eval(term).reset_index().rename(columns={0:'average'}).assign(term=term)
               for term in terms], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)
In [646]:
plotnine.options.figure_size = (9, 4)
(ggplot(aes(y='motif_pair', x='task', fill='average')) + 
 geom_tile(data=c) +
 theme_classic() +  
 facet_grid(". ~ term") + 
 theme(legend_position='top')  + 
 scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=1, limits=[0, 2]) 
)
Out[646]:
<ggplot: (8729852120633)>
In [636]:
plotnine.options.figure_size = (9, 4)
(ggplot(aes(y='motif_pair', x='task', fill='average')) + 
 geom_tile(data=c) +
 theme_classic() +  
 facet_grid(". ~ term") + 
 theme(legend_position='top')  + 
 scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=1, limits=[0, 2]) 
)
Out[636]:
<ggplot: (8729461551564)>

dB -> A effect

In [12]:
def profile_count(narrow, wide, profile_slice=None, **kwargs):
    if profile_slice is None:
        profile_slice = np.arange(wide['ref']['pred'].shape[1])
    return (wide['dother']['pred'][:,profile_slice].mean(axis=(1,2)),
            wide['ref']['pred'][:,profile_slice].mean(axis=(1,2)))

def log_profile_count(narrow, wide, profile_slice=None, **kwargs):
    if profile_slice is None:
        profile_slice = np.arange(wide['ref']['pred'].shape[1])
    return (np.log(1+wide['dother']['pred'][:,profile_slice].mean(axis=(1,2))),
            np.log(1+wide['ref']['pred'][:,profile_slice].mean(axis=(1,2))))


def profile_count_norm(narrow, wide, profile_slice=None, **kwargs):
    if profile_slice is None:
        profile_slice = np.arange(wide['ref']['pred'].shape[1])
        
    return (wide['dother']['pred'][:,profile_slice].mean(axis=(1,2)) - wide['dboth']['pred'][:,profile_slice].mean(axis=(1,2)),
            wide['ref']['pred'][:,profile_slice].mean(axis=(1,2)) - wide['dthis']['pred'][:,profile_slice].mean(axis=(1,2)))


def log_profile_count_norm(narrow, wide, profile_slice=None, **kwargs):
    if profile_slice is None:
        profile_slice = np.arange(wide['ref']['pred'].shape[1])
        
    return (np.log(1+wide['dother']['pred'][:,profile_slice].mean(axis=(1,2))) - np.log(1+wide['dboth']['pred'][:,profile_slice].mean(axis=(1,2))),
            np.log(1+wide['ref']['pred'][:,profile_slice].mean(axis=(1,2))) - np.log(1+wide['dthis']['pred'][:,profile_slice].mean(axis=(1,2))))


def max_profile_count(narrow, wide, max_position=None, **kwargs):
    if max_position is None:
        max_position = np.argmax(wide['ref']['pred'].mean(axis=0), axis=0)
        
    return (wide['dother']['pred'][:,max_position, [0,1]].mean(axis=-1),
            wide['ref']['pred'][:,max_position, [0,1]].mean(axis=-1))

def log_max_profile_count(narrow, wide, max_position=None, **kwargs):
    if max_position is None:
        max_position = np.argmax(wide['ref']['pred'].mean(axis=0), axis=0)
        
    return (np.log(1+wide['dother']['pred'][:,max_position, [0,1]].mean(axis=-1)),
            np.log(1+wide['ref']['pred'][:,max_position, [0,1]].mean(axis=-1)))

def max_profile_count_norm(narrow, wide, max_position=None, **kwargs):
    if max_position is None:
        max_position = np.argmax(wide['ref']['pred'].mean(axis=0), axis=0)

    return (wide['dother']['pred'][:,max_position, [0,1]].mean(axis=-1) - wide['dboth']['pred'][:,max_position, [0,1]].mean(axis=-1),
            wide['ref']['pred'][:,max_position, [0,1]].mean(axis=-1) - wide['dthis']['pred'][:,max_position, [0,1]].mean(axis=-1))

def log_max_profile_count_norm(narrow, wide, max_position=None, **kwargs):
    if max_position is None:
        max_position = np.argmax(wide['ref']['pred'].mean(axis=0), axis=0)

    return (np.log(1+wide['dother']['pred'][:,max_position, [0,1]].mean(axis=-1)) - np.log(1+wide['dboth']['pred'][:,max_position, [0,1]].mean(axis=-1)),
            np.log(1+wide['ref']['pred'][:,max_position, [0,1]].mean(axis=-1)) - np.log(1+wide['dthis']['pred'][:,max_position, [0,1]].mean(axis=-1)))


def imp_profile(narrow, wide, **kwargs):
    return (narrow['dother']['imp']['profile'].max(axis=-1).mean(axis=-1),
            narrow['ref']['imp']['profile'].max(axis=-1).mean(axis=-1))


def imp_count(narrow, wide, **kwargs):
    return (narrow['dother']['imp']['count'].max(axis=-1).mean(axis=-1),
            narrow['ref']['imp']['count'].max(axis=-1).mean(axis=-1))
                   
                   
SCORES = [profile_count,
          profile_count_norm,
          max_profile_count,
          max_profile_count_norm,
          imp_profile,
          imp_count]

LOGSCORES = [log_profile_count,
             log_profile_count_norm,
             log_max_profile_count,
             log_max_profile_count_norm,
             imp_profile,
             imp_count]


def compute_features(narrow, wide, SCORES, **kwargs):
    for score in SCORES:
        return {score.__name__: score(narrow, wide, **kwargs)}

def compute_features_tidy(motif_pair_lpdata, tasks, plot_features=SCORES, pseudo_count_quantile=0, profile_slice=slice(82, 118), variable=None, pval=False):
    from basepair.plot.config import get_figsize
    nf = len(plot_features)
    
    out = []
    for motif_pair_name, lpdata in tqdm(motif_pair_lpdata.items()):
        motif_pair = list(motif_pair_name.split("<>"))
        dfab_sma = lpdata['dfab'].copy()
        
        # TODO - loop through all possible combinations
        xvals = list(motif_pair_lpdata[motif_pair_name]['x'])
        
        for task in tasks:
            # compute features
            for score in plot_features:
                dfab_sm = dfab_sma.copy()
                dfab_sm['task'] = task
                dfab_sm['score'] = score.__name__
                for xy in ['x', 'y']:
                    wide = {k: motif_pair_lpdata[motif_pair_name][xy][k]['wide'][task]
                            for k in motif_pair_lpdata[motif_pair_name][xy]}
                    narrow = {k: motif_pair_lpdata[motif_pair_name][xy][k]['narrow'][task]
                              for k in motif_pair_lpdata[motif_pair_name][xy]}
                    dfab_sm[xy + '_alt'], dfab_sm[xy + '_ref'] = score(narrow, wide, profile_slice=profile_slice)
                    dfab_sm[xy + '_alt_ref'] = dfab_sm[xy + '_alt'] / dfab_sm[xy + '_ref']
                    pc = np.quantile(dfab_sm[xy + '_ref'], pseudo_count_quantile)
                    dfab_sm[xy + '_alt_pc'], dfab_sm[xy + '_ref_pc'] = dfab_sm[xy + '_alt'] + pc, dfab_sm[xy + '_ref'] + pc
                    dfab_sm[xy + '_alt_ref_pc'] = dfab_sm[xy + '_alt_pc'] / dfab_sm[xy + '_ref_pc']
                    
                out.append(dfab_sm)
    return pd.concat(out, axis=0)
In [18]:
dfabf = compute_features_tidy(motif_pair_lpdata, tasks, SCORES, pseudo_count_quantile=.2, profile_slice=slice(82, 118))
100%|██████████| 10/10 [00:03<00:00,  2.02it/s]
In [58]:
dfabf_log = compute_features_tidy(motif_pair_lpdata, tasks, LOGSCORES, pseudo_count_quantile=.2, profile_slice=slice(82, 118))
100%|██████████| 10/10 [00:06<00:00,  1.10it/s]

Pairwise scatterplots

In [60]:
dfabf['center_diff_cat'] = pd.Categorical(pd.cut(dfabf['center_diff'], [0, 35, 70, 150, 1000]))
dfabf_log['center_diff_cat'] = pd.Categorical(pd.cut(dfabf_log['center_diff'], [0, 35, 70, 150, 1000]))
In [44]:
pairs = list(motif_pair_lpdata)

Log scale

In [61]:
plotnine.options.figure_size = (15, 10)
# motif_pair_name = 'Sox2<>Nanog'
for motif_pair_name in pairs:
    motif_pair = list(motif_pair_name.split("<>"))
    fig = (ggplot(aes(x='x_alt_ref_pc', y='y_alt_ref_pc', color='center_diff_cat'), dfabf_log[dfabf_log.motif_pair == motif_pair_name]) + 
     geom_point(alpha=0.2, size=.05, shape='.', show_legend=False) + 
     geom_point(alpha=1, size=5, data=dfabf_log[:0], shape='.') +   # for the legend
     facet_grid("task~score") + 
     xlim([0, 2]) + 
     ylim([0, 2]) + 
     theme_bw() + 
     xlab(motif_pair[0] + f" (d{motif_pair[1]})") + 
     ylab(motif_pair[1] + f" (d{motif_pair[0]})") + 
     ggtitle(motif_pair_name) +
     scale_color_brewer(type='qual', palette=2)
    )
    display(fig)
<ggplot: (8790048458637)>
<ggplot: (8790191914048)>
<ggplot: (8790215341716)>
<ggplot: (-9223363246638733501)>
<ggplot: (-9223363246805251910)>
<ggplot: (-9223363246808598584)>
<ggplot: (8790048814635)>
<ggplot: (8790049303069)>
<ggplot: (8790191869367)>
<ggplot: (-9223363246639474863)>

Normal scale

In [46]:
plotnine.options.figure_size = (15, 10)
# motif_pair_name = 'Sox2<>Nanog'
for motif_pair_name in pairs:
    motif_pair = list(motif_pair_name.split("<>"))
    fig = (ggplot(aes(x='x_alt_ref_pc', y='y_alt_ref_pc', color='center_diff_cat'), dfabf[dfabf.motif_pair == motif_pair_name]) + 
     geom_point(alpha=0.2, size=.05, shape='.', show_legend=False) + 
     geom_point(alpha=1, size=5, data=dfabf[:0], shape='.') +   # for the legend
     facet_grid("task~score") + 
     xlim([0, 2]) + 
     ylim([0, 2]) + 
     theme_bw() + 
     xlab(motif_pair[0] + f" (d{motif_pair[1]})") + 
     ylab(motif_pair[1] + f" (d{motif_pair[0]})") + 
     ggtitle(motif_pair_name) +
     scale_color_brewer(type='qual', palette=2)
    )
    display(fig)
<ggplot: (-9223363246813295430)>
<ggplot: (8790029781224)>
<ggplot: (-9223363246824932818)>
<ggplot: (-9223363246596983734)>
<ggplot: (-9223363246805485771)>
<ggplot: (-9223363246596457724)>
<ggplot: (8790225274828)>
<ggplot: (-9223363246812469707)>
<ggplot: (-9223363246813002173)>
<ggplot: (8790240213494)>

Scratch space

In [32]:
from basepair.exp.chipnexus.perturb import plot_scatter
In [33]:
plot_scatter
Out[33]:
<function basepair.exp.chipnexus.perturb.plot_scatter(x_ref, x_alt, y_ref, y_alt, ax, alpha=0.2, s=1, label=None, xl=[0, 2], pval=True)>
In [46]:
# compute the max
In [75]:
from basepair.plot.tracks import plot_tracks
In [453]:
s = slice(82, 118)
In [454]:
plot_tracks( dict(a=motif_pair_lpdata['Oct4-Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, s].mean(axis=0)));
In [73]:
def plot_scatter(x_ref, x_alt, y_ref, y_alt, ax, alpha=.2, s=1, label=None, xl=[0, 2], pval=True):
    ax.scatter(x_alt / x_ref,
               y_alt / y_ref, alpha=alpha, s=s, label=label)
    if pval:
        xpval = wilcoxon(x_ref, x_alt).pvalue
        ypval = wilcoxon(y_ref, y_alt).pvalue
        kwargs = dict(size="small", horizontalalignment='center')
        ax.text(1.8, 1, f"{xpval:.2g}", **kwargs)
        ax.text(1, 1.8, f"{ypval:.2g}", **kwargs)
    alpha = .5
    ax.plot(xl, xl, c='grey', alpha=alpha)
    ax.axvline(1, c='grey', alpha=alpha)
    ax.axhline(1, c='grey', alpha=alpha)
    ax.set_xlim(xl)
    ax.set_ylim(xl)
In [76]:
from basepair.exp.chipnexus.perturb import plot_scatter
In [92]:
sox2_max_positions = np.argmax(motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'].mean(axis=0), axis=0)
nanog_max_positions = np.argmax(motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'].mean(axis=0), axis=0)
In [ ]:
x = ((np.log(1+motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2))) - 
      np.log(1+motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'].sum(axis=(1,2))))) 
# (motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'].sum(axis=(1,2))))
y = motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['narrow']['Nanog']['imp']['count'].sum(axis=(1,2))
regression_eval(y, x)
plt.xlabel("ISM (Wr - dNanog)")
plt.ylabel("DeepLIFT count importance");
In [138]:
fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=2+motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Sox2<>Nanog']['x']['dthis']['wide']['Sox2']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1),
             x_alt=2+motif_pair_lpdata['Sox2<>Nanog']['x']['dother']['wide']['Sox2']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Sox2<>Nanog']['x']['dboth']['wide']['Sox2']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1), 
             y_ref=2+motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Sox2<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             y_alt=2+motif_pair_lpdata['Sox2<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Sox2<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             ax=ax, xl=[0, 2])
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")
Out[138]:
Text(0, 0.5, 'Nanog (dSox2)')
In [118]:
fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=2+motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1),
             x_alt=2+motif_pair_lpdata['Sox2<>Nanog']['x']['dother']['wide']['Sox2']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1), 
             y_ref=2+motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             y_alt=2+motif_pair_lpdata['Sox2<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             ax=ax, xl=[0, 2])
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")
Out[118]:
Text(0, 0.5, 'Nanog (dSox2)')
In [117]:
fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=1+motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)),
             x_alt=1+motif_pair_lpdata['Sox2<>Nanog']['x']['dother']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)), 
             y_ref=1+motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             y_alt=1+motif_pair_lpdata['Sox2<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             ax=ax, xl=[0, 2]
        )
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")
Out[117]:
Text(0, 0.5, 'Nanog (dSox2)')
In [134]:
s = slice(85, 118)
In [119]:
fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=1+motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['x']['dthis']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)),
             x_alt=1+motif_pair_lpdata['Sox2<>Nanog']['x']['dother']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['x']['dboth']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)), 
             y_ref=1+motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             y_alt=1+motif_pair_lpdata['Sox2<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             ax=ax, xl=[0, 2]
        )
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")
Out[119]:
Text(0, 0.5, 'Nanog (dSox2)')
In [129]:
s = slice(65, 135)
In [130]:
fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['x']['dthis']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)),
             x_alt=motif_pair_lpdata['Sox2<>Nanog']['x']['dother']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['x']['dboth']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)), 
             y_ref=motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             y_alt=motif_pair_lpdata['Sox2<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             ax=ax, xl=[-1, 3]
        )
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")
Out[130]:
Text(0, 0.5, 'Nanog (dSox2)')
In [122]:
fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=1+motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['x']['dthis']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)),
             x_alt=1+motif_pair_lpdata['Sox2<>Nanog']['x']['dother']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['x']['dboth']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)), 
             y_ref=1+motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             y_alt=1+motif_pair_lpdata['Sox2<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             ax=ax, xl=[0, 2]
        )
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")
Out[122]:
Text(0, 0.5, 'Nanog (dSox2)')
In [148]:
fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=2+motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             x_alt=2+motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)), 
             y_ref=2+motif_pair_lpdata['Nanog<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             y_alt=2+motif_pair_lpdata['Nanog<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             ax=ax, xl=[0, 2]
        )
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")
Out[148]:
Text(0, 0.5, 'Nanog (dSox2)')
In [137]:
fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=2+motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1),
             x_alt=2+motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1), 
             y_ref=2+motif_pair_lpdata['Nanog<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             y_alt=2+motif_pair_lpdata['Nanog<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             ax=ax, xl=[0, 2])
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")
Out[137]:
Text(0, 0.5, 'Nanog (dSox2)')
In [145]:
fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=2+motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             x_alt=2+motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1), 
             y_ref=2+motif_pair_lpdata['Nanog<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             y_alt=2+motif_pair_lpdata['Nanog<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             ax=ax, xl=[0, 2])
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")
Out[145]:
Text(0, 0.5, 'Nanog (dSox2)')
In [145]:
fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=2+motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             x_alt=2+motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1), 
             y_ref=2+motif_pair_lpdata['Nanog<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             y_alt=2+motif_pair_lpdata['Nanog<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             ax=ax, xl=[0, 2])
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")
Out[145]:
Text(0, 0.5, 'Nanog (dSox2)')

TODO

  • correlate ISM scores and deeplift scores
In [193]:
np.mean(x > 1)
Out[193]:
0.576455615653834
In [194]:
np.mean(x)
Out[194]:
1.2576684
In [ ]:
plt.scatter(motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'].sum(axis=(1,2)),
            motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'].sum(axis=(1,2)))
In [192]:
x = ((2*motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2)) - 
 motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'].sum(axis=(1,2)) - 
 motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'].sum(axis=(1,2))) / 
(motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'].sum(axis=(1,2))))
plt.hist(x[(x>-1) & (x<10)], 50);
plt.xlabel("Synergy score");
In [195]:
y = motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['narrow']['Nanog']['imp']['profile']
In [240]:
motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['narrow']['Nanog']['imp']['count'][0]
Out[240]:
array([[ 0.    ,  0.0033,  0.    ,  0.    ],
       [ 0.    ,  0.    ,  0.0074,  0.    ],
       [ 0.0377, -0.    ,  0.    ,  0.    ],
       [ 0.0271,  0.    ,  0.    ,  0.    ],
       [ 0.    , -0.    , -0.0234,  0.    ],
       [ 0.    ,  0.    ,  0.0133,  0.    ],
       [ 0.    , -0.0121, -0.    , -0.    ],
       [-0.    , -0.    , -0.    ,  0.0255],
       [-0.    ,  0.0099, -0.    , -0.    ]], dtype=float32)
In [225]:
plt.hist(motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['narrow']['Nanog']['imp']['count'].sum(axis=(1,2)))
Out[225]:
(array([   2.,   18.,   37.,  167.,  723., 2762., 8756., 3175.,   69.,    6.]),
 array([-2.09  , -1.7737, -1.4574, -1.1411, -0.8248, -0.5085, -0.1922,  0.1241,  0.4404,  0.7567,
         1.073 ], dtype=float32),
 <a list of 10 Patch objects>)
In [313]:
from basepair.plot.evaluate import regression_eval
In [179]:
x = ((motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2)) - 
 motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'].sum(axis=(1,2))))
y = (motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'].sum(axis=(1,2)))
plt.scatter(x,y, alpha=0.1);
# plt.xlabel("Single TF affinity");
  File "<ipython-input-179-92dbdb5559a3>", line 3
    y = (motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2))) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'].sum(axis=(1,2)))
                                                                                                                                                                                           ^
SyntaxError: invalid syntax
In [178]:
x = ((motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2))) - motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'].sum(axis=(1,2)))
plt.hist(x[x< 100], 50);
plt.xlabel("Single TF affinity");
In [163]:
motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Klf4']['obs'].shape
Out[163]:
(15715, 200, 2)