Pairwise perturbation¶

This notebooks shows

TODO¶

[x] take top 1500 values per counts, sort by pairwise distance
[x] try out different normalizers
[x] add also the sequence plot
[x] eyeball the plots
- what would be a good way to quantify the effects? - importance scores
[x] implement different feature functions quantifying the interactions
- [x] given a list of arrays (pred, imp, imp_count), quantify the metric
- [x] combine differnet metrics with each other
- [x] add also the max-count metric
  - positions with maximal number of counts (averaged per strands)
[x] add scatterplots quantifying the interactions
[ ] re-compute motif_centers and motif_diff using proper rounding schemes

TODO¶

[ ] generate all the pdf plots
[ ] for scatterplots, show only the corresponding TF in the main (triangular) figure
- [ ] for the supplementary figures generate all 40 plots
- [ ] use max count
[ ] add the heatmap of A->B type of analysis
- [ ] show <35 and >35 distance versions
[ ] make sure colors look good and are consistent
[ ] switch colors so that red means the 'interesting' effect
[ ] simulation - generate the max score (same as in the perturbation analysis)

from basepair.utils import read_pkl
from pathlib import Path
from basepair.exp.paper.config import motifs, profile_mapping
from basepair.plot.config import paper_config, get_figsize
from basepair.exp.chipnexus.perturb.vdom import vdom_motif_pair, plot_spacing_hist
from basepair.exp.chipnexus.spacing import remove_edge_instances, get_motif_pairs
from basepair.plot.heatmaps import RowQuantileNormalizer, QuantileTruncateNormalizer
from tqdm import tqdm
from basepair.data import NumpyDataset
from basepair.exp.chipnexus.perturb.gen import *
from basepair.exp.chipnexus.perturb.scores import *
from copy import deepcopy
from plotnine import *
import plotnine
import warnings
warnings.filterwarnings("ignore")

from basepair.config import get_data_dir
paper_config()
ddir = get_data_dir()

Using TensorFlow backend.
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/concise/utils/plot.py:115: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  min_coords = np.vstack(data.min(0) for data in polygons_data).min(0)
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/concise/utils/plot.py:116: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  max_coords = np.vstack(data.max(0) for data in polygons_data).max(0)

# Common paths
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
modisco_dir = model_dir / f"modisco/all/deeplift/profile/"
output_dir = modisco_dir
dataset_dir = output_dir / 'perturbation-analysis'

pairs = get_motif_pairs(motifs)

# load the data
motif_pair_lpdata = read_pkl(dataset_dir / 'motif_pair_lpdata.incl-whole.pkl')
dfab = pd.read_csv(dataset_dir / 'dfab.csv.gz')

dfi_subset = pd.read_csv(dataset_dir / 'dfi_subset.csv.gz')

# write_pkl(motif_pair_lpdata, dataset_dir / 'motif_pair_lpdata.incl-whole.pkl')

# TODO - de-hardcode
tasks = ['Oct4', 'Sox2', 'Nanog', 'Klf4']

Basic stats¶

%matplotlib inline

paper_config()

fig = plt.figure(figsize=get_figsize(.5))
dfab.groupby("motif_pair").size().plot.barh()
plt.axvline(1500)  # displayed cutoff
plt.xlabel("Number of pairs");

Pairwise spacing¶

dfab['motif_pair_cat'] = pd.Categorical(dfab.motif_pair, categories=dfab.groupby("motif_pair").size().sort_values(ascending=False).index)
dfab['strand_combination_cat'] = pd.Categorical(dfab['strand_combination'], )

plotnine.options.figure_size = get_figsize(.5, aspect=2)# (10, 10)
max_dist = 100
(ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist)]) + 
 geom_histogram(bins=max_dist) + 
 facet_grid("motif_pair_cat~ .") + 
 theme_classic() + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 xlab("Pairwise distance") + 
 scale_fill_brewer(type='qual', palette=3))

<ggplot: (8729886685353)>

Pairwise spacing¶

plotnine.options.figure_size = get_figsize(.5, aspect=2/10*3)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) + 
 geom_vline(xintercept=10, alpha=0.1) + 
 geom_vline(xintercept=20, alpha=0.1) + 
 geom_vline(xintercept=30, alpha=0.1) + 
 geom_vline(xintercept=40, alpha=0.1) + 
 geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") + 
 theme_classic() + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 xlab("Pairwise distance") +
 ggtitle("Nanog<>Nanog") + 
 scale_fill_brewer(type='qual', palette=3))
fig

<ggplot: (-9223363306968474266)>

TODO - export¶

Perturbation analysis¶

output_dir = Path('/srv/www/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/deeplift/profile/perturbation/figures.2')
figures_url = Path('/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/deeplift/profile/perturbation/figures.2')

!mkdir -p {output_dir}

RowQuantileNormalizer¶

vdom_motif_pair(motif_pair_lpdata, dfab, profile_mapping, figures_dir=str(output_dir /  'RowQuantileNormalizer'), figures_url=str(figures_url /  'RowQuantileNormalizer'), profile_width=200, cache=False, normalizer=RowQuantileNormalizer())

100%|██████████| 10/10 [14:13<00:00, 85.18s/it]

QuantileTruncateNormalizer¶

vdom_motif_pair(motif_pair_lpdata, dfab, profile_mapping, figures_dir=str(output_dir /  'QuantileTruncateNormalizer'), figures_url=str(figures_url /  'QuantileTruncateNormalizer'), profile_width=200, cache=False, normalizer=QuantileTruncateNormalizer())

100%|██████████| 10/10 [13:34<00:00, 81.29s/it]

log(1+x) normalization¶

vdom_motif_pair(motif_pair_lpdata, dfab, profile_mapping, figures_dir=str(output_dir /  'lognorm'), figures_url=str(figures_url /  'lognorm'), profile_width=200, cache=False, normalizer=lambda x: np.log(1+x))

100%|██████████| 10/10 [13:43<00:00, 82.69s/it]

Summarize data¶

TODO¶

[x] implement different feature functions quantifying the interactions
- [x] given a list of arrays (pred, imp, imp_count), quantify the metric
- [X] combine differnet metrics with each other
- [x] add also the max-count metric
  - positions with maximal number of counts (averaged per strands)
[x] add scatterplots quantifying the interactions
[ ] re-compute motif_centers and motif_diff using proper rounding schemes

Synergy scores¶

Wt = total counts in ref
dA = total counts after removing A
dB = total counts after removing B
dAB = total counts after removing both

Then TF-A depends on A with

(Wt-dA)/(Wt - dAB)

and on B with

(Wt-dB)/(Wt - dAB)

def ism_compute_features_tidy(motif_pair_lpdata, tasks,):
    out = []
    for motif_pair_name, lpdata in tqdm(motif_pair_lpdata.items()):
        motif_pair = list(motif_pair_name.split("<>"))
        for task in tasks:
            dfab_sm = lpdata['dfab'].copy()
            dfab_sm['task'] = task
            whole = {k: motif_pair_lpdata[motif_pair_name]['x'][k]['whole'][task]
                      for k in motif_pair_lpdata[motif_pair_name]['x']}
            dfab_sm['Wt_obs'] = whole['ref']['obs'].sum(axis=(1,2))
            dfab_sm['Wt'] = whole['ref']['pred'].sum(axis=(1,2))
            dfab_sm['dA'] = whole['dthis']['pred'].sum(axis=(1,2))
            dfab_sm['dB'] = whole['dother']['pred'].sum(axis=(1,2))
            dfab_sm['dAB'] = whole['dboth']['pred'].sum(axis=(1,2))
            out.append(dfab_sm)
    return pd.concat(out, axis=0)

dfabf_ism = ism_compute_features_tidy(motif_pair_lpdata, tasks)

100%|██████████| 10/10 [00:02<00:00,  3.22it/s]

# dfs.to_csv(dataset_dir / 'dfs.csv.gz', compression='gzip', index=False)

dfabf_ism.head()

dfc.head()

dfs = dfabf_ism[['Wt_obs', 'Wt', 'dA', 'dB', 'dAB', 'motif_pair', 'task', 'center_diff', 'strand_combination', 'example_idx']]
dfs.head()

plt.scatter(np.log10(1+dfs.Wt), np.log10(1+dfs.Wt_obs), s=1, alpha=0.1)
plt.xlabel("Predicted Wt (log10)")
plt.ylabel("Observed Wt (log10)");

np.log10(1+dfs.Wt_obs).plot.hist(300);
plt.xlabel("Observed Wt (log10)");

dfs.head()

# load activity data
import pybedtools
from pybedtools import BedTool
from basepair.extractors import MultiAssayExtractor
from basepair.data import NumpyDataset

df = pd.read_csv(f"{ddir}/processed/chipnexus/external-data.tsv", sep='\t')
dfs = df[df.assay.isin(['PolII', 'H3K27ac'])]

df_regions = dfi_subset[['example_chrom', 'example_start', 'example_end', 'example_idx']].drop_duplicates()
bt = BedTool.from_dataframe(df_regions)

extractor = MultiAssayExtractor(dfs, None, use_strand=False, n_jobs=10)

regions = extractor.extract(list(bt))

r = NumpyDataset(regions)
dfc = pd.DataFrame(r.aggregate(np.sum, axis=1))
dfc['example_idx'] = df_regions['example_idx'].values

[########################################] | 100% Completed |  1min 41.0s

ls {dataset_dir}

dfs = dfabf_ism[['Wt_obs', 'Wt', 'dA', 'dB', 'dAB', 'motif_pair', 'task', 'center_diff', 'strand_combination', 'example_idx']]
dfs.head()

dfs = pd.merge(dfs, dfc, on='example_idx', how='left')

# TODO - merge the two and export

dataset_dir

PosixPath('/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/deeplift/profile/perturbation-analysis')

dfs.to_csv(dataset_dir / 'pair.total_counts.csv.gz', index=False, compression='gzip')  # store the table for Julia

dfs = dfs[dfs.center_diff < 35]

Fraction of positive values¶

terms = ['Wt-dA>0', 'Wt-dB>0', 'Wt-dAB>0', 'dA-dAB>0', 'dB-dAB>0']
c = pd.concat([dfs.groupby(['motif_pair', 'task']).apply(lambda x: x.eval(term).mean()).reset_index().rename(columns={0:'fraction'}).assign(term=term)
               for term in terms], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)

plotnine.options.figure_size = (10, 4)
(ggplot(aes(y='motif_pair', x='task', fill='fraction'), c) + geom_tile() + theme_classic() +  facet_grid(". ~ term") + 
 theme(legend_position='top')  + 
 scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=.5, limits=[0, 1]) 
)

<ggplot: (8729441476784)>

TODO - export¶

Masked scores¶

mask = 'Wt-dAB>0'
pc = 100  # pseudo-counts
terms = ['(2*Wt-dA-dB)/(Wt-dAB)', f'({pc}+2*Wt-dA-dB)/({pc}+Wt-dAB)', f'(2*Wt-dA-dB)/(Wt-dAB)*({mask})']
c = pd.concat([dfs.groupby(['motif_pair', 'task']).apply(lambda x: x.eval(term).mean() if x.eval(mask).mean() > .9 else np.nan).reset_index().rename(columns={0:'average'}).assign(term=term)
               for term in terms], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)

plotnine.options.figure_size = (9, 4)
(ggplot(aes(y='motif_pair', x='task', fill='average')) + 
 geom_tile(data=c.dropna()) + 
 geom_text(label="/", data=c[c.average.isnull()]) + 
 theme_classic() +  
 facet_grid(". ~ term") + 
 theme(legend_position='top')  + 
 scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=1, limits=[0, 2]) 
)

<ggplot: (8729479120886)>

Alternative solutions¶

Median. Mask > .9¶

mask = 'Wt-dAB>0'
pc = 100  # pseudo-counts
terms = ['(2*Wt-dA-dB)/(Wt-dAB)']
c = pd.concat([dfs.groupby(['motif_pair', 'task']).apply(lambda x: x.eval(term).median() if x.eval(mask).mean() > .9 else np.nan).reset_index().rename(columns={0:'average'}).assign(term=term)
               for term in terms], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)

plotnine.options.figure_size = (3, 4)
(ggplot(aes(y='motif_pair', x='task', fill='average')) + 
 geom_tile(data=c.dropna()) + 
 geom_text(label="/", data=c[c.average.isnull()]) + 
 theme_classic() +  
 facet_grid(". ~ term") + 
 theme(legend_position='top')  + 
 scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=1, limits=[0, 2]) 
)

<ggplot: (8729495972549)>

TODO - export to PDF¶

mask > 0.2¶

mask = 'Wt-dAB>0'
pc = 100  # pseudo-counts
terms = ['(2*Wt-dA-dB)/(Wt-dAB)', f'({pc}+2*Wt-dA-dB)/({pc}+Wt-dAB)', f'(2*Wt-dA-dB)/(Wt-dAB)*({mask})']
c = pd.concat([dfs.groupby(['motif_pair', 'task']).apply(lambda x: x.eval(term).median()).reset_index().rename(columns={0:'average'}).assign(term=term)
               for term in terms], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)

plotnine.options.figure_size = (9, 4)
(ggplot(aes(y='motif_pair', x='task', fill='average')) + 
 geom_tile(data=c.dropna()) + 
 geom_text(label="/", data=c[c.average.isnull()]) + 
 theme_classic() +  
 facet_grid(". ~ term") + 
 theme(legend_position='top')  + 
 scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=1, limits=[0, 2]) 
)

<ggplot: (8729441468010)>

Sum up the reads in all regions¶

terms = ['(2*Wt-dA-dB)/(Wt-dAB)', '0.75 + 0.5 *(Wt-dAB > 0)',  '0.75 + 0.5 *(Wt-dA > 0)', '0.75 + 0.5 *(Wt-dB > 0)']
features = ['Wt', 'dA', 'dB', 'dAB']
c = pd.concat([dfs.groupby(['motif_pair', 'task'])[features].sum().eval(term).reset_index().rename(columns={0:'average'}).assign(term=term)
               for term in terms], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)

plotnine.options.figure_size = (9, 4)
(ggplot(aes(y='motif_pair', x='task', fill='average')) + 
 geom_tile(data=c) +
 theme_classic() +  
 facet_grid(". ~ term") + 
 theme(legend_position='top')  + 
 scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=1, limits=[0, 2]) 
)

<ggplot: (8729852120633)>

plotnine.options.figure_size = (9, 4)
(ggplot(aes(y='motif_pair', x='task', fill='average')) + 
 geom_tile(data=c) +
 theme_classic() +  
 facet_grid(". ~ term") + 
 theme(legend_position='top')  + 
 scale_fill_gradient2(low='blue', mid='white', high='red', midpoint=1, limits=[0, 2]) 
)

<ggplot: (8729461551564)>

dB -> A effect¶

def profile_count(narrow, wide, profile_slice=None, **kwargs):
    if profile_slice is None:
        profile_slice = np.arange(wide['ref']['pred'].shape[1])
    return (wide['dother']['pred'][:,profile_slice].mean(axis=(1,2)),
            wide['ref']['pred'][:,profile_slice].mean(axis=(1,2)))

def log_profile_count(narrow, wide, profile_slice=None, **kwargs):
    if profile_slice is None:
        profile_slice = np.arange(wide['ref']['pred'].shape[1])
    return (np.log(1+wide['dother']['pred'][:,profile_slice].mean(axis=(1,2))),
            np.log(1+wide['ref']['pred'][:,profile_slice].mean(axis=(1,2))))


def profile_count_norm(narrow, wide, profile_slice=None, **kwargs):
    if profile_slice is None:
        profile_slice = np.arange(wide['ref']['pred'].shape[1])
        
    return (wide['dother']['pred'][:,profile_slice].mean(axis=(1,2)) - wide['dboth']['pred'][:,profile_slice].mean(axis=(1,2)),
            wide['ref']['pred'][:,profile_slice].mean(axis=(1,2)) - wide['dthis']['pred'][:,profile_slice].mean(axis=(1,2)))


def log_profile_count_norm(narrow, wide, profile_slice=None, **kwargs):
    if profile_slice is None:
        profile_slice = np.arange(wide['ref']['pred'].shape[1])
        
    return (np.log(1+wide['dother']['pred'][:,profile_slice].mean(axis=(1,2))) - np.log(1+wide['dboth']['pred'][:,profile_slice].mean(axis=(1,2))),
            np.log(1+wide['ref']['pred'][:,profile_slice].mean(axis=(1,2))) - np.log(1+wide['dthis']['pred'][:,profile_slice].mean(axis=(1,2))))


def max_profile_count(narrow, wide, max_position=None, **kwargs):
    if max_position is None:
        max_position = np.argmax(wide['ref']['pred'].mean(axis=0), axis=0)
        
    return (wide['dother']['pred'][:,max_position, [0,1]].mean(axis=-1),
            wide['ref']['pred'][:,max_position, [0,1]].mean(axis=-1))

def log_max_profile_count(narrow, wide, max_position=None, **kwargs):
    if max_position is None:
        max_position = np.argmax(wide['ref']['pred'].mean(axis=0), axis=0)
        
    return (np.log(1+wide['dother']['pred'][:,max_position, [0,1]].mean(axis=-1)),
            np.log(1+wide['ref']['pred'][:,max_position, [0,1]].mean(axis=-1)))

def max_profile_count_norm(narrow, wide, max_position=None, **kwargs):
    if max_position is None:
        max_position = np.argmax(wide['ref']['pred'].mean(axis=0), axis=0)

    return (wide['dother']['pred'][:,max_position, [0,1]].mean(axis=-1) - wide['dboth']['pred'][:,max_position, [0,1]].mean(axis=-1),
            wide['ref']['pred'][:,max_position, [0,1]].mean(axis=-1) - wide['dthis']['pred'][:,max_position, [0,1]].mean(axis=-1))

def log_max_profile_count_norm(narrow, wide, max_position=None, **kwargs):
    if max_position is None:
        max_position = np.argmax(wide['ref']['pred'].mean(axis=0), axis=0)

    return (np.log(1+wide['dother']['pred'][:,max_position, [0,1]].mean(axis=-1)) - np.log(1+wide['dboth']['pred'][:,max_position, [0,1]].mean(axis=-1)),
            np.log(1+wide['ref']['pred'][:,max_position, [0,1]].mean(axis=-1)) - np.log(1+wide['dthis']['pred'][:,max_position, [0,1]].mean(axis=-1)))


def imp_profile(narrow, wide, **kwargs):
    return (narrow['dother']['imp']['profile'].max(axis=-1).mean(axis=-1),
            narrow['ref']['imp']['profile'].max(axis=-1).mean(axis=-1))


def imp_count(narrow, wide, **kwargs):
    return (narrow['dother']['imp']['count'].max(axis=-1).mean(axis=-1),
            narrow['ref']['imp']['count'].max(axis=-1).mean(axis=-1))
                   
                   
SCORES = [profile_count,
          profile_count_norm,
          max_profile_count,
          max_profile_count_norm,
          imp_profile,
          imp_count]

LOGSCORES = [log_profile_count,
             log_profile_count_norm,
             log_max_profile_count,
             log_max_profile_count_norm,
             imp_profile,
             imp_count]


def compute_features(narrow, wide, SCORES, **kwargs):
    for score in SCORES:
        return {score.__name__: score(narrow, wide, **kwargs)}

def compute_features_tidy(motif_pair_lpdata, tasks, plot_features=SCORES, pseudo_count_quantile=0, profile_slice=slice(82, 118), variable=None, pval=False):
    from basepair.plot.config import get_figsize
    nf = len(plot_features)
    
    out = []
    for motif_pair_name, lpdata in tqdm(motif_pair_lpdata.items()):
        motif_pair = list(motif_pair_name.split("<>"))
        dfab_sma = lpdata['dfab'].copy()
        
        # TODO - loop through all possible combinations
        xvals = list(motif_pair_lpdata[motif_pair_name]['x'])
        
        for task in tasks:
            # compute features
            for score in plot_features:
                dfab_sm = dfab_sma.copy()
                dfab_sm['task'] = task
                dfab_sm['score'] = score.__name__
                for xy in ['x', 'y']:
                    wide = {k: motif_pair_lpdata[motif_pair_name][xy][k]['wide'][task]
                            for k in motif_pair_lpdata[motif_pair_name][xy]}
                    narrow = {k: motif_pair_lpdata[motif_pair_name][xy][k]['narrow'][task]
                              for k in motif_pair_lpdata[motif_pair_name][xy]}
                    dfab_sm[xy + '_alt'], dfab_sm[xy + '_ref'] = score(narrow, wide, profile_slice=profile_slice)
                    dfab_sm[xy + '_alt_ref'] = dfab_sm[xy + '_alt'] / dfab_sm[xy + '_ref']
                    pc = np.quantile(dfab_sm[xy + '_ref'], pseudo_count_quantile)
                    dfab_sm[xy + '_alt_pc'], dfab_sm[xy + '_ref_pc'] = dfab_sm[xy + '_alt'] + pc, dfab_sm[xy + '_ref'] + pc
                    dfab_sm[xy + '_alt_ref_pc'] = dfab_sm[xy + '_alt_pc'] / dfab_sm[xy + '_ref_pc']
                    
                out.append(dfab_sm)
    return pd.concat(out, axis=0)

dfabf = compute_features_tidy(motif_pair_lpdata, tasks, SCORES, pseudo_count_quantile=.2, profile_slice=slice(82, 118))

100%|██████████| 10/10 [00:03<00:00,  2.02it/s]

dfabf_log = compute_features_tidy(motif_pair_lpdata, tasks, LOGSCORES, pseudo_count_quantile=.2, profile_slice=slice(82, 118))

100%|██████████| 10/10 [00:06<00:00,  1.10it/s]

Pairwise scatterplots¶

dfabf['center_diff_cat'] = pd.Categorical(pd.cut(dfabf['center_diff'], [0, 35, 70, 150, 1000]))
dfabf_log['center_diff_cat'] = pd.Categorical(pd.cut(dfabf_log['center_diff'], [0, 35, 70, 150, 1000]))

pairs = list(motif_pair_lpdata)

Log scale¶

plotnine.options.figure_size = (15, 10)
# motif_pair_name = 'Sox2<>Nanog'
for motif_pair_name in pairs:
    motif_pair = list(motif_pair_name.split("<>"))
    fig = (ggplot(aes(x='x_alt_ref_pc', y='y_alt_ref_pc', color='center_diff_cat'), dfabf_log[dfabf_log.motif_pair == motif_pair_name]) + 
     geom_point(alpha=0.2, size=.05, shape='.', show_legend=False) + 
     geom_point(alpha=1, size=5, data=dfabf_log[:0], shape='.') +   # for the legend
     facet_grid("task~score") + 
     xlim([0, 2]) + 
     ylim([0, 2]) + 
     theme_bw() + 
     xlab(motif_pair[0] + f" (d{motif_pair[1]})") + 
     ylab(motif_pair[1] + f" (d{motif_pair[0]})") + 
     ggtitle(motif_pair_name) +
     scale_color_brewer(type='qual', palette=2)
    )
    display(fig)

<ggplot: (8790048458637)>

<ggplot: (8790191914048)>

<ggplot: (8790215341716)>

<ggplot: (-9223363246638733501)>

<ggplot: (-9223363246805251910)>

<ggplot: (-9223363246808598584)>

<ggplot: (8790048814635)>

<ggplot: (8790049303069)>

<ggplot: (8790191869367)>

<ggplot: (-9223363246639474863)>

Normal scale¶

plotnine.options.figure_size = (15, 10)
# motif_pair_name = 'Sox2<>Nanog'
for motif_pair_name in pairs:
    motif_pair = list(motif_pair_name.split("<>"))
    fig = (ggplot(aes(x='x_alt_ref_pc', y='y_alt_ref_pc', color='center_diff_cat'), dfabf[dfabf.motif_pair == motif_pair_name]) + 
     geom_point(alpha=0.2, size=.05, shape='.', show_legend=False) + 
     geom_point(alpha=1, size=5, data=dfabf[:0], shape='.') +   # for the legend
     facet_grid("task~score") + 
     xlim([0, 2]) + 
     ylim([0, 2]) + 
     theme_bw() + 
     xlab(motif_pair[0] + f" (d{motif_pair[1]})") + 
     ylab(motif_pair[1] + f" (d{motif_pair[0]})") + 
     ggtitle(motif_pair_name) +
     scale_color_brewer(type='qual', palette=2)
    )
    display(fig)

<ggplot: (-9223363246813295430)>

<ggplot: (8790029781224)>

<ggplot: (-9223363246824932818)>

<ggplot: (-9223363246596983734)>

<ggplot: (-9223363246805485771)>

<ggplot: (-9223363246596457724)>

<ggplot: (8790225274828)>

<ggplot: (-9223363246812469707)>

<ggplot: (-9223363246813002173)>

<ggplot: (8790240213494)>

Scratch space¶

from basepair.exp.chipnexus.perturb import plot_scatter

plot_scatter

<function basepair.exp.chipnexus.perturb.plot_scatter(x_ref, x_alt, y_ref, y_alt, ax, alpha=0.2, s=1, label=None, xl=[0, 2], pval=True)>

# compute the max

from basepair.plot.tracks import plot_tracks

s = slice(82, 118)

plot_tracks( dict(a=motif_pair_lpdata['Oct4-Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, s].mean(axis=0)));

def plot_scatter(x_ref, x_alt, y_ref, y_alt, ax, alpha=.2, s=1, label=None, xl=[0, 2], pval=True):
    ax.scatter(x_alt / x_ref,
               y_alt / y_ref, alpha=alpha, s=s, label=label)
    if pval:
        xpval = wilcoxon(x_ref, x_alt).pvalue
        ypval = wilcoxon(y_ref, y_alt).pvalue
        kwargs = dict(size="small", horizontalalignment='center')
        ax.text(1.8, 1, f"{xpval:.2g}", **kwargs)
        ax.text(1, 1.8, f"{ypval:.2g}", **kwargs)
    alpha = .5
    ax.plot(xl, xl, c='grey', alpha=alpha)
    ax.axvline(1, c='grey', alpha=alpha)
    ax.axhline(1, c='grey', alpha=alpha)
    ax.set_xlim(xl)
    ax.set_ylim(xl)

from basepair.exp.chipnexus.perturb import plot_scatter

sox2_max_positions = np.argmax(motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'].mean(axis=0), axis=0)
nanog_max_positions = np.argmax(motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'].mean(axis=0), axis=0)

x = ((np.log(1+motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2))) - 
      np.log(1+motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'].sum(axis=(1,2))))) 
# (motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'].sum(axis=(1,2))))
y = motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['narrow']['Nanog']['imp']['count'].sum(axis=(1,2))
regression_eval(y, x)
plt.xlabel("ISM (Wr - dNanog)")
plt.ylabel("DeepLIFT count importance");

fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=2+motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Sox2<>Nanog']['x']['dthis']['wide']['Sox2']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1),
             x_alt=2+motif_pair_lpdata['Sox2<>Nanog']['x']['dother']['wide']['Sox2']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Sox2<>Nanog']['x']['dboth']['wide']['Sox2']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1), 
             y_ref=2+motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Sox2<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             y_alt=2+motif_pair_lpdata['Sox2<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Sox2<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             ax=ax, xl=[0, 2])
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")

Text(0, 0.5, 'Nanog (dSox2)')

fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=2+motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1),
             x_alt=2+motif_pair_lpdata['Sox2<>Nanog']['x']['dother']['wide']['Sox2']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1), 
             y_ref=2+motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             y_alt=2+motif_pair_lpdata['Sox2<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             ax=ax, xl=[0, 2])
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")

Text(0, 0.5, 'Nanog (dSox2)')

fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=1+motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)),
             x_alt=1+motif_pair_lpdata['Sox2<>Nanog']['x']['dother']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)), 
             y_ref=1+motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             y_alt=1+motif_pair_lpdata['Sox2<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             ax=ax, xl=[0, 2]
        )
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")

Text(0, 0.5, 'Nanog (dSox2)')

s = slice(85, 118)

fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=1+motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['x']['dthis']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)),
             x_alt=1+motif_pair_lpdata['Sox2<>Nanog']['x']['dother']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['x']['dboth']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)), 
             y_ref=1+motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             y_alt=1+motif_pair_lpdata['Sox2<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             ax=ax, xl=[0, 2]
        )
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")

Text(0, 0.5, 'Nanog (dSox2)')

s = slice(65, 135)

fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['x']['dthis']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)),
             x_alt=motif_pair_lpdata['Sox2<>Nanog']['x']['dother']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['x']['dboth']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)), 
             y_ref=motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             y_alt=motif_pair_lpdata['Sox2<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             ax=ax, xl=[-1, 3]
        )
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")

Text(0, 0.5, 'Nanog (dSox2)')

fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=1+motif_pair_lpdata['Sox2<>Nanog']['x']['ref']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['x']['dthis']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)),
             x_alt=1+motif_pair_lpdata['Sox2<>Nanog']['x']['dother']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['x']['dboth']['wide']['Sox2']['pred'][:, s].mean(axis=(1,2)), 
             y_ref=1+motif_pair_lpdata['Sox2<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             y_alt=1+motif_pair_lpdata['Sox2<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Sox2<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             ax=ax, xl=[0, 2]
        )
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")

Text(0, 0.5, 'Nanog (dSox2)')

fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=2+motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             x_alt=2+motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)), 
             y_ref=2+motif_pair_lpdata['Nanog<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             y_alt=2+motif_pair_lpdata['Nanog<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, s].mean(axis=(1,2)),
             ax=ax, xl=[0, 2]
        )
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")

Text(0, 0.5, 'Nanog (dSox2)')

fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=2+motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1),
             x_alt=2+motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'][:, sox2_max_positions,[0, 1]].mean(axis=-1), 
             y_ref=2+motif_pair_lpdata['Nanog<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             y_alt=2+motif_pair_lpdata['Nanog<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             ax=ax, xl=[0, 2])
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")

Text(0, 0.5, 'Nanog (dSox2)')

fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=2+motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             x_alt=2+motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1), 
             y_ref=2+motif_pair_lpdata['Nanog<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             y_alt=2+motif_pair_lpdata['Nanog<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             ax=ax, xl=[0, 2])
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")

Text(0, 0.5, 'Nanog (dSox2)')

fig,ax = plt.subplots(figsize=get_figsize(.5, 1))
plot_scatter(x_ref=2+motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             x_alt=2+motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1), 
             y_ref=2+motif_pair_lpdata['Nanog<>Nanog']['y']['ref']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['y']['dthis']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             y_alt=2+motif_pair_lpdata['Nanog<>Nanog']['y']['dother']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1) - motif_pair_lpdata['Nanog<>Nanog']['y']['dboth']['wide']['Nanog']['pred'][:, nanog_max_positions,[0, 1]].mean(axis=-1),
             ax=ax, xl=[0, 2])
plt.xlabel("Sox2 (dNanog)")
plt.ylabel("Nanog (dSox2)")

Text(0, 0.5, 'Nanog (dSox2)')

TODO¶

correlate ISM scores and deeplift scores

np.mean(x > 1)

0.576455615653834

np.mean(x)

1.2576684

plt.scatter(motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'].sum(axis=(1,2)),
            motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'].sum(axis=(1,2)))

x = ((2*motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2)) - 
 motif_pair_lpdata['Nanog<>Nanog']['x']['dother']['wide']['Nanog']['pred'].sum(axis=(1,2)) - 
 motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'].sum(axis=(1,2))) / 
(motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'].sum(axis=(1,2))))
plt.hist(x[(x>-1) & (x<10)], 50);
plt.xlabel("Synergy score");

y = motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['narrow']['Nanog']['imp']['profile']

motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['narrow']['Nanog']['imp']['count'][0]

array([[ 0.    ,  0.0033,  0.    ,  0.    ],
       [ 0.    ,  0.    ,  0.0074,  0.    ],
       [ 0.0377, -0.    ,  0.    ,  0.    ],
       [ 0.0271,  0.    ,  0.    ,  0.    ],
       [ 0.    , -0.    , -0.0234,  0.    ],
       [ 0.    ,  0.    ,  0.0133,  0.    ],
       [ 0.    , -0.0121, -0.    , -0.    ],
       [-0.    , -0.    , -0.    ,  0.0255],
       [-0.    ,  0.0099, -0.    , -0.    ]], dtype=float32)

plt.hist(motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['narrow']['Nanog']['imp']['count'].sum(axis=(1,2)))

(array([   2.,   18.,   37.,  167.,  723., 2762., 8756., 3175.,   69.,    6.]),
 array([-2.09  , -1.7737, -1.4574, -1.1411, -0.8248, -0.5085, -0.1922,  0.1241,  0.4404,  0.7567,
         1.073 ], dtype=float32),
 <a list of 10 Patch objects>)

from basepair.plot.evaluate import regression_eval

x = ((motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2)) - 
 motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'].sum(axis=(1,2))))
y = (motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2)) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'].sum(axis=(1,2)))
plt.scatter(x,y, alpha=0.1);
# plt.xlabel("Single TF affinity");

  File "<ipython-input-179-92dbdb5559a3>", line 3
    y = (motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2))) - motif_pair_lpdata['Nanog<>Nanog']['x']['dboth']['wide']['Nanog']['pred'].sum(axis=(1,2)))
                                                                                                                                                                                           ^
SyntaxError: invalid syntax

x = ((motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Nanog']['pred'].sum(axis=(1,2))) - motif_pair_lpdata['Nanog<>Nanog']['x']['dthis']['wide']['Nanog']['pred'].sum(axis=(1,2)))
plt.hist(x[x< 100], 50);
plt.xlabel("Single TF affinity");

motif_pair_lpdata['Nanog<>Nanog']['x']['ref']['wide']['Klf4']['obs'].shape

(15715, 200, 2)

	Unnamed: 0	pattern_x	example_idx	pattern_start_x	pattern_end_x	strand_x	pattern_len_x	pattern_center_x	match_weighted_x	match_weighted_p_x	match_weighted_cat_x	match_max_x	match_max_task_x	imp_weighted_x	imp_weighted_p_x	imp_weighted_cat_x	imp_max_x	imp_max_task_x	seq_match_x	seq_match_p_x	seq_match_cat_x	match/Klf4_x	match/Nanog_x	match/Oct4_x	match/Sox2_x	imp/Klf4_x	imp/Nanog_x	imp/Oct4_x	imp/Sox2_x	example_chrom_x	example_start_x	example_end_x	example_strand_x	example_interval_from_task_x	pattern_short_x	pattern_name_x	pattern_start_abs_x	pattern_end_abs_x	pattern_center_aln_x	pattern_strand_aln_x	row_idx_x	pattern_y	pattern_start_y	pattern_end_y	strand_y	pattern_len_y	pattern_center_y	match_weighted_y	match_weighted_p_y	match_weighted_cat_y	match_max_y	match_max_task_y	imp_weighted_y	imp_weighted_p_y	imp_weighted_cat_y	imp_max_y	imp_max_task_y	seq_match_y	seq_match_p_y	seq_match_cat_y	match/Klf4_y	match/Nanog_y	match/Oct4_y	match/Sox2_y	imp/Klf4_y	imp/Nanog_y	imp/Oct4_y	imp/Sox2_y	example_chrom_y	example_start_y	example_end_y	example_strand_y	example_interval_from_task_y	pattern_short_y	pattern_name_y	pattern_start_abs_y	pattern_end_abs_y	pattern_center_aln_y	pattern_strand_aln_y	row_idx_y	center_diff	center_diff_aln	strand_combination	motif_pair	motif_pair_idx	task	Wt_obs	Wt	dA	dB	dAB
0	1	metacluster_0/pattern_0	1	437.0	452.0	+	15.0	444.0	0.5391	0.5105	medium	0.5627	Sox2	0.9221	0.8271	low	1.0504	Oct4	9.6558	0.4369	medium	0.5452	0.4714	0.5524	0.5627	0.5605	1.0090	1.0504	0.9147	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	448.0	+	0.0	metacluster_0/pattern_0	458.0	473.0	+	15.0	465.0	0.5174	0.3763	medium	0.5613	Oct4	1.0389	0.9077	low	1.2170	Sox2	11.3139	0.7737	high	0.4683	0.4598	0.5613	0.5274	0.5500	1.1198	1.0884	1.2170	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	469.0	+	1.0	21.0	21.0	++	Oct4-Sox2<>Oct4-Sox2	0	Oct4	16198.0	58234.4141	28042.2031	38972.9766	23831.6582
1	2	metacluster_0/pattern_0	1	437.0	452.0	+	15.0	444.0	0.5391	0.5105	medium	0.5627	Sox2	0.9221	0.8271	low	1.0504	Oct4	9.6558	0.4369	medium	0.5452	0.4714	0.5524	0.5627	0.5605	1.0090	1.0504	0.9147	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	448.0	+	0.0	metacluster_0/pattern_0	499.0	514.0	+	15.0	506.0	0.5154	0.3673	medium	0.5390	Oct4	1.3206	0.9862	low	1.5825	Oct4	9.8925	0.4740	medium	0.4916	0.4731	0.5390	0.5267	0.6803	1.0429	1.5825	1.5485	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	510.0	+	2.0	62.0	62.0	++	Oct4-Sox2<>Oct4-Sox2	1	Oct4	16198.0	58234.4141	28042.2031	17461.5430	9037.3682
2	3	metacluster_0/pattern_0	1	437.0	452.0	+	15.0	444.0	0.5391	0.5105	medium	0.5627	Sox2	0.9221	0.8271	low	1.0504	Oct4	9.6558	0.4369	medium	0.5452	0.4714	0.5524	0.5627	0.5605	1.0090	1.0504	0.9147	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	448.0	+	0.0	metacluster_0/pattern_0	520.0	535.0	+	15.0	527.0	0.4897	0.2392	low	0.5008	Oct4	1.4250	0.9950	low	1.6570	Sox2	7.7218	0.1351	low	0.4555	0.4992	0.5008	0.4895	0.8025	1.2853	1.6062	1.6570	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	531.0	+	3.0	83.0	83.0	++	Oct4-Sox2<>Oct4-Sox2	2	Oct4	16198.0	58234.4141	28042.2031	37178.5391	22600.3633
3	4	metacluster_0/pattern_0	1	437.0	452.0	+	15.0	444.0	0.5391	0.5105	medium	0.5627	Sox2	0.9221	0.8271	low	1.0504	Oct4	9.6558	0.4369	medium	0.5452	0.4714	0.5524	0.5627	0.5605	1.0090	1.0504	0.9147	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	448.0	+	0.0	metacluster_0/pattern_0	541.0	556.0	+	15.0	548.0	0.4843	0.2165	low	0.5198	Oct4	1.2319	0.9746	low	1.4439	Oct4	9.6558	0.4369	medium	0.4183	0.4421	0.5198	0.5058	0.6828	1.0318	1.4439	1.4188	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	552.0	+	4.0	104.0	104.0	++	Oct4-Sox2<>Oct4-Sox2	3	Oct4	16198.0	58234.4141	28042.2031	23178.7891	13922.0508
4	7	metacluster_0/pattern_0	1	458.0	473.0	+	15.0	465.0	0.5174	0.3763	medium	0.5613	Oct4	1.0389	0.9077	low	1.2170	Sox2	11.3139	0.7737	high	0.4683	0.4598	0.5613	0.5274	0.5500	1.1198	1.0884	1.2170	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	469.0	+	1.0	metacluster_0/pattern_0	499.0	514.0	+	15.0	506.0	0.5154	0.3673	medium	0.5390	Oct4	1.3206	0.9862	low	1.5825	Oct4	9.8925	0.4740	medium	0.4916	0.4731	0.5390	0.5267	0.6803	1.0429	1.5825	1.5485	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	510.0	+	2.0	41.0	41.0	++	Oct4-Sox2<>Oct4-Sox2	4	Oct4	16198.0	58234.4141	38972.9766	17461.5430	9234.9775

	H3K27ac	PolII	example_idx
0	12794.0	2184.0	1
1	220274.0	94643.0	2
2	130652.0	86907.0	3
3	76793.0	60257.0	4
4	26238.0	13179.0	5

	Wt_obs	Wt	dA	dB	dAB	motif_pair	task	center_diff	strand_combination	example_idx
0	16198.0	58234.4141	28042.2031	38972.9766	23831.6582	Oct4-Sox2<>Oct4-Sox2	Oct4	21.0	++	1
1	16198.0	58234.4141	28042.2031	17461.5430	9037.3682	Oct4-Sox2<>Oct4-Sox2	Oct4	62.0	++	1
2	16198.0	58234.4141	28042.2031	37178.5391	22600.3633	Oct4-Sox2<>Oct4-Sox2	Oct4	83.0	++	1
3	16198.0	58234.4141	28042.2031	23178.7891	13922.0508	Oct4-Sox2<>Oct4-Sox2	Oct4	104.0	++	1
4	16198.0	58234.4141	38972.9766	17461.5430	9234.9775	Oct4-Sox2<>Oct4-Sox2	Oct4	41.0	++	1

	Wt_obs	Wt	dA	dB	dAB	motif_pair	task	center_diff	strand_combination
0	16198.0	58234.4141	28042.2031	38972.9766	23831.6582	Oct4-Sox2<>Oct4-Sox2	Oct4	21.0	++
1	16198.0	58234.4141	28042.2031	17461.5430	9037.3682	Oct4-Sox2<>Oct4-Sox2	Oct4	62.0	++
2	16198.0	58234.4141	28042.2031	37178.5391	22600.3633	Oct4-Sox2<>Oct4-Sox2	Oct4	83.0	++
3	16198.0	58234.4141	28042.2031	23178.7891	13922.0508	Oct4-Sox2<>Oct4-Sox2	Oct4	104.0	++
4	16198.0	58234.4141	38972.9766	17461.5430	9234.9775	Oct4-Sox2<>Oct4-Sox2	Oct4	41.0	++

	Wt_obs	Wt	dA	dB	dAB	motif_pair	task	center_diff	strand_combination	example_idx
0	16198.0	58234.4141	28042.2031	38972.9766	23831.6582	Oct4-Sox2<>Oct4-Sox2	Oct4	21.0	++	1
1	16198.0	58234.4141	28042.2031	17461.5430	9037.3682	Oct4-Sox2<>Oct4-Sox2	Oct4	62.0	++	1
2	16198.0	58234.4141	28042.2031	37178.5391	22600.3633	Oct4-Sox2<>Oct4-Sox2	Oct4	83.0	++	1
3	16198.0	58234.4141	28042.2031	23178.7891	13922.0508	Oct4-Sox2<>Oct4-Sox2	Oct4	104.0	++	1
4	16198.0	58234.4141	38972.9766	17461.5430	9234.9775	Oct4-Sox2<>Oct4-Sox2	Oct4	41.0	++	1