Figure 5,6¶

TODO¶

[x] generate all the pdf plots
[x] for scatterplots, show only the corresponding TF in the main (triangular) figure
- [x] for the supplementary figures generate all 40 plots
- [x] use max count
[x] add the heatmap of A->B type of analysis
- [x] show <35 and >35 distance versions
[x] make sure colors look good and are consistent
[x] switch colors so that red means the 'interesting' effect
[x] add the co-occurence heatmap
[ ] simulation - generate the max score (same as in the perturbation analysis)
[ ] re-compute motif_centers and motif_diff using proper rounding schemes

# Imports
from basepair.imports import *
from basepair.exp.paper.config import motifs, profile_mapping
from basepair.exp.chipnexus.perturb.vdom import vdom_motif_pair, plot_spacing_hist
from basepair.exp.chipnexus.spacing import remove_edge_instances, get_motif_pairs
from basepair.exp.paper.config import tasks
from plotnine import *
import plotnine
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
paper_config()

Using TensorFlow backend.

# Common paths
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
modisco_dir = model_dir / f"modisco/all/deeplift/profile/"
output_dir = modisco_dir
dataset_dir = output_dir / 'perturbation-analysis'

pairs = get_motif_pairs(motifs)

# ordered names
pair_names = ["<>".join(x) for x in pairs]

!mkdir -p {ddir}/figures/modisco/spacing/perturb

figures_dir = Path(f"{ddir}/figures/modisco/spacing/perturb")

Load the data¶

# load the data
dataset_dir = output_dir / 'perturbation-analysis'

from basepair.exp.chipnexus.perturb.scores import ism_compute_features_tidy, compute_features_tidy, SCORES, max_profile_count

# motif_pair_lpdata = read_pkl(dataset_dir / 'motif_pair_lpdata.incl-whole.pkl')

# dfabf_ism = ism_compute_features_tidy(motif_pair_lpdata, tasks)
# dfs = dfabf_ism[['Wt_obs', 'Wt', 'dA', 'dB', 'dAB', 'motif_pair', 'task', 'center_diff', 'strand_combination']]

# dfabf = compute_features_tidy(motif_pair_lpdata, tasks, SCORES, 
#                               pseudo_count_quantile=.2, 
#                               profile_slice=slice(82, 118))

# dfabf.to_parquet(dataset_dir / 'dfabf.parq')

# dfabf.to_csv(dataset_dir / 'dfabf.csv.gz', compression='gzip')
# dfs.to_csv(dataset_dir / 'dfs.csv.gz', compression='gzip')

ls {dataset_dir}

dfab.csv.gz     dfi_subset.csv.gz                 pair.total_counts.csv
dfabf.csv.gz    dfs.csv.gz                        pair.total_counts.csv.gz
dfabf.feather   double_mut.h5                     ref.h5
dfabf_ism.parq  motif_pair_lpdata.incl-whole.pkl  single_mut.h5
dfabf.parq      motif_pair_lpdata.pkl

ls {dataset_dir}

dfab.csv.gz     dfi_subset.csv.gz                 pair.total_counts.csv
dfabf.csv.gz    dfs.csv.gz                        pair.total_counts.csv.gz
dfabf.feather   double_mut.h5                     ref.h5
dfabf_ism.parq  motif_pair_lpdata.incl-whole.pkl  single_mut.h5
dfabf.parq      motif_pair_lpdata.pkl

!cp {dataset_dir}/dfabf.csv.gz /srv/www//kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/deeplift/profile/perturbation-analysis/

dfabf.head()

from IPython.display import FileLink
FileLink(dataset_dir / 'dfs.csv.gz')

dfabf = pd.read_csv(dataset_dir / 'dfabf.csv.gz')

# dfabf = pd.read_parquet(dataset_dir / 'dfabf.parq')
dfabf['center_diff_cat'] = pd.Categorical(pd.cut(dfabf['center_diff'], [0, 35, 70, 150, 1000]))
dfs = pd.read_csv(dataset_dir / 'dfs.csv.gz')

dfab = pd.read_csv(dataset_dir / 'dfab.csv.gz')
dfab['motif_pair_cat'] = pd.Categorical(dfab.motif_pair, categories=dfab.groupby("motif_pair").size().sort_values(ascending=False).index)
dfab['strand_combination_cat'] = pd.Categorical(dfab['strand_combination'], )

dfi_subset = pd.read_csv(dataset_dir / 'dfi_subset.csv.gz')

dfs.motif_pair.value_counts()

Nanog<>Klf4             72768
Nanog<>Nanog            62860
Oct4-Sox2<>Nanog        36956
                        ...  
Oct4-Sox2<>Sox2          9320
Oct4-Sox2<>Oct4-Sox2     5176
Sox2<>Sox2               4904
Name: motif_pair, Length: 10, dtype: int64

dfs.head()

TODO¶

have uniform distance filters

# define the global set of distances
dist_subsets = ['center_diff<=35',
               '(center_diff>35)&(center_diff<=70)', 
               '(center_diff>70)&(center_diff<=150)', 
               'center_diff>150']
dist_subset_labels = ['dist < 35',
                      '35 < dist <= 70',
                      '70 < dist <= 150',
                      '150 < dist',
                     ]

Co-occurence test¶

<150bp¶

from basepair.exp.chipnexus.spacing import coocurrence_plot

# co-occurence
fig, ax = plt.subplots(figsize=get_figsize(.25, aspect=1))
dist = 150
coocurrence_plot(dfi_subset, list(motifs),
                 query_string=f"(abs(pattern_center_aln_x- pattern_center_aln_y) <= {dist})")
ax.set_ylabel("Motif of interest")
ax.set_xlabel("Motif partner");
fig.savefig(figures_dir / f'coocurrence-test.center_diff<{dist}.pdf')

dfi_subset.head()

<35bp¶

from basepair.exp.chipnexus.spacing import co_occurence_matrix

# co-occurence
fig, ax = plt.subplots(figsize=get_figsize(.25, aspect=1))
dist = 35
coocurrence_plot(dfi_subset, list(motifs),
                 query_string=f"center_diff <= {dist}")
ax.set_ylabel("Motif of interest")
ax.set_xlabel("Motif partner");
fig.savefig(figures_dir / f'coocurrence-test.center_diff<{dist}.pdf')

All distance ranges¶

# subsets = ['center_diff <= 35', 'center_diff > 35', 'center_diff > 70']
subsets = dist_subsets
fig,axes = plt.subplots(1, len(subsets), 
                        figsize=get_figsize(0.25*len(subsets), .8/len(subsets)), 
                        sharey=True)
for i, (subset,ax, subset_label) in enumerate(zip(subsets, axes, dist_subset_labels)):
    if i == len(subsets) - 1:
        cbar = True
    else:
        cbar = False
    coocurrence_plot(dfi_subset, list(motifs), query_string=subset, ax=ax, cbar=cbar)
    if i == 0:
        ax.set_ylabel("Motif of interest")
        ax.set_xlabel("Motif partner");
    
    ax.set_title(subset_label)
# plt.tight_layout()
plt.savefig(figures_dir / 'coocurrence-test.all-dist.pdf')

Pairwise distance distribution¶

plotnine.options.figure_size = get_figsize(.5, aspect=2)# (10, 10)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist)]) + 
 geom_histogram(bins=max_dist) + 
 facet_grid("motif_pair_cat~ .") + 
 theme_classic(base_size=10, base_family='Arial') + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 xlab("Pairwise distance") + 
 scale_fill_brewer(type='qual', palette=3))
fig.save(figures_dir / 'histogram.center_diff.all.pdf')
fig

<ggplot: (-9223363310486774314)>

Nanog¶

plotnine.options.figure_size = get_figsize(.3, aspect=2/10*4)
max_dist = 100
fig = (ggplot(aes(x='center_diff', fill='strand_combination'), dfab[(dfab.center_diff <= max_dist) & (dfab.motif_pair.isin(["Nanog<>Nanog"]))]) + 
 geom_vline(xintercept=10, alpha=0.1) + 
 geom_vline(xintercept=20, alpha=0.1) + 
 geom_vline(xintercept=30, alpha=0.1) + 
 geom_vline(xintercept=40, alpha=0.1) + 
 geom_histogram(bins=max_dist) + facet_grid("strand_combination~.") + 
 theme_classic(base_size=10, base_family='Arial') + 
 theme(strip_text = element_text(rotation=0), legend_position='top') + 
 xlim([0, max_dist]) + 
 xlab("Pairwise distance") +
 ggtitle("Nanog<>Nanog") + 
 scale_fill_brewer(type='qual', palette=3))
fig.save(figures_dir / 'nanog.spacing.pdf')
fig

<ggplot: (-9223363310659608965)>

Synergy scores¶

Wt = total counts in ref
dA = total counts after removing A
dB = total counts after removing B
dAB = total counts after removing both

Then TF-A depends on A with

(Wt-dA)/(Wt - dAB)

and on B with

(Wt-dB)/(Wt - dAB)

# dfs = dfabf_ism[['Wt_obs', 'Wt', 'dA', 'dB', 'dAB', 'motif_pair', 'task', 'center_diff', 'strand_combination']]
dfs_prox = dfs[dfs.center_diff <= 35]
dfs_dist = dfs[dfs.center_diff > 35]
dfs.head()

Fraction of positive values¶

# choose the right color
colors = plt.get_cmap("RdBu")(np.linspace(0, 1, 9))
hex_colors = [plt.cm.colors.to_hex(a) for a in colors]
sns.palplot(colors)

cb_red = hex_colors[2]
cb_blue = hex_colors[-2 - 2]

hex_colors

['#67001f',
 '#bb2a34',
 '#e58368',
 '#fbceb7',
 '#f6f7f7',
 '#c0dceb',
 '#68abd0',
 '#2870b1',
 '#053061']

from basepair.plot.utils import PlotninePalette

terms = ['Wt-dA>0', 'Wt-dB>0', 'Wt-dAB>0', 'dA-dAB>0', 'dB-dAB>0']
filter_terms = ['center_diff <= 35', 'center_diff > 35']
c = pd.concat([(dfs.query(ft).groupby(['motif_pair', 'task']).apply(lambda x: x.eval(term).mean()).
                reset_index().
                rename(columns={0:'fraction'}).
                assign(term=term, subset=ft))
               for term in terms for ft in filter_terms], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)
c['subset'] = pd.Categorical(c['subset'], categories=filter_terms)
c['motif_pair'] = pd.Categorical(c['motif_pair'], pair_names[::-1])

plotnine.options.figure_size = get_figsize(1, 0.6)
fig = (ggplot(aes(y='motif_pair', x='task', fill='fraction'), c) + 
       geom_tile() + 
       theme_classic(base_size=10, base_family='Arial') +  
       facet_grid("subset ~ term") + 
       theme(legend_position='top', axis_text_x=element_text(rotation=30, hjust=1))  + 
       scale_fill_gradient2(low=cb_red, mid='white', high=cb_blue, midpoint=.5, limits=[0, 1], 
                            palette=PlotninePalette("RdBu", [.1, .9])) +
       ggtitle("Fraction of positive terms")
)
fig.save(figures_dir / 'heatmap.positive_fraction.pdf')
fig

<ggplot: (8726136487751)>

Synergy scores: `(2*Wt-dA-dB)/(Wt-dAB)`¶

Require: 90% Wt-dAB>0¶

mask = 'Wt-dAB>0'
terms = ['(2*Wt-dA-dB)/(Wt-dAB)']
filter_terms = dist_subsets
filter_names = dist_subset_labels
# filter_terms = ['center_diff <= 35', 'center_diff > 35']
c = pd.concat([(dfs.query(ft).groupby(['motif_pair', 'task']).
                apply(lambda x: x.eval(term).median() if x.eval(mask).mean() > .9 else np.nan).
                reset_index().
                rename(columns={0:'average'}).
                assign(term=term, subset=ft, subset_name=filter_names[i]))
               for term in terms for i, ft in enumerate(filter_terms)], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)
c['subset'] = pd.Categorical(c['subset'], categories=filter_terms)
c['subset_name'] = pd.Categorical(c['subset_name'], categories=filter_names)
c['motif_pair'] = pd.Categorical(c['motif_pair'], pair_names[::-1])

plotnine.options.figure_size = get_figsize(1, 0.6)
assert len(terms) == 1
fig = (ggplot(aes(y='motif_pair', x='task', fill='average')) + 
       geom_tile(data=c.dropna()) + 
       # geom_tile(fill='#d9d9d9', data=c[c.average.isnull()]) + 
       geom_text(label='/', data=c[c.average.isnull()]) + 
       theme_classic(base_size=10, base_family='Arial') + 
       facet_grid(". ~ subset_name") + 
       theme(legend_position='top', axis_text_x=element_text(rotation=30, hjust=1))  + 
       scale_fill_gradient2(low=cb_red, mid='white', high=cb_blue, midpoint=1, limits=[.6, 1.4], 
                            palette=PlotninePalette("RdBu_r", [0, 1])) +
       # scale_fill_gradient2(low=cb_blue, mid='white', high=cb_red, midpoint=1, limits=[0, 2]) +
       ggtitle(terms[0] + ", 90% Wt-dAB>0")
)
fig.save(figures_dir / 'heatmap.synergy-score.90perc.Wt-dAB>0.pdf')
fig

<ggplot: (8759288233094)>

Setup table showing only the major TF's¶

Row -> target motif of interest (and the task)
column -> other motif

def get_syn_effect(c, pairs, motifs, tasks):
    motifxtask = [f"{motif} (task={profile_mapping[motif]})" for motif in motifs]
    dfp = pd.DataFrame(index=motifxtask, columns=motifs, dtype=float)
    k = 0
    for i in range(len(motifs)):
        for j in range(len(motifs)):
            if i>j:
                continue
            idx = 'motif_pair_idx'
            cat = 'center_diff_cat'
            motif_pair = pairs[k]
            motif_pair_name = "<>".join(motif_pair)
            k+=1
            t1 = profile_mapping[motif_pair[0]]
            t2 = profile_mapping[motif_pair[1]]
            
            dfp.loc[motif_pair[0] + f" (task={t1})"][motif_pair[1]] = c[(c.motif_pair == motif_pair_name) & (c.task == t1)]['average']
            dfp.loc[motif_pair[1] + f" (task={t2})"][motif_pair[0]] = c[(c.motif_pair == motif_pair_name) & (c.task == t2)]['average']
    # dfp.columns = [r"$\Delta$"+c for c in dfp.columns]
    return dfp

Major TF per motif¶

# subsets = ['center_diff <= 35', 'center_diff > 35', 'center_diff > 70']
subsets = dist_subsets
fig,axes = plt.subplots(1, len(subsets), 
                        figsize=get_figsize(0.25*len(subsets), .8/len(subsets)), 
                        sharey=True)
for i, (subset,ax, subset_title) in enumerate(zip(subsets, axes, dist_subset_labels)):
    sns.heatmap(get_syn_effect(c[c.subset == subset], pairs, motifs, tasks), cmap='RdBu_r', center=1, ax=ax, 
                cbar=i==len(subsets)-1,
                vmin=.7, vmax=1.3)
    ax.set_title(subset_title)
plt.savefig(figures_dir / 'heatmap.synergy-score.90perc.Wt-dAB>0.main_tasks.pdf')

Require: 20% Wt-dAB>0¶

mask = 'Wt-dAB>0'
terms = ['(2*Wt-dA-dB)/(Wt-dAB)']
filter_terms = dist_subsets
filter_names = dist_subset_labels
# filter_terms = ['center_diff <= 35', 'center_diff > 35']
c = pd.concat([(dfs.query(ft).groupby(['motif_pair', 'task']).
                apply(lambda x: x.eval(term).median() if x.eval(mask).mean() > .2 else np.nan).
                reset_index().
                rename(columns={0:'average'}).
                assign(term=term, subset=ft, subset_name=filter_names[i]))
               for term in terms for i, ft in enumerate(filter_terms)], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)
c['subset'] = pd.Categorical(c['subset'], categories=filter_terms)
c['subset_name'] = pd.Categorical(c['subset_name'], categories=filter_names)
c['motif_pair'] = pd.Categorical(c['motif_pair'], pair_names[::-1])

plotnine.options.figure_size = get_figsize(1, 0.6)
assert len(terms) == 1
fig = (ggplot(aes(y='motif_pair', x='task', fill='average')) + 
       geom_tile(data=c.dropna()) + 
       # geom_tile(fill='#d9d9d9', data=c[c.average.isnull()]) + 
       geom_text(label='/', data=c[c.average.isnull()]) + 
       theme_classic(base_size=10, base_family='Arial') + 
       facet_grid(". ~ subset_name") + 
       theme(legend_position='top', axis_text_x=element_text(rotation=30, hjust=1))  + 
       scale_fill_gradient2(low=cb_red, mid='white', high=cb_blue, midpoint=1, limits=[.6, 1.4], 
                            palette=PlotninePalette("RdBu_r", [0, 1])) +
       # scale_fill_gradient2(low=cb_blue, mid='white', high=cb_red, midpoint=1, limits=[0, 2]) +
       ggtitle(terms[0] + ", 90% Wt-dAB>0")
)
fig.save(figures_dir / 'heatmap.synergy-score.20perc.Wt-dAB>0.pdf')
fig

<ggplot: (-9223363277898274777)>

Major TF per motif¶

subsets = dist_subsets
fig,axes = plt.subplots(1, len(subsets), 
                        figsize=get_figsize(0.25*len(subsets), .8/len(subsets)), 
                        sharey=True)
for i, (subset,ax, subset_title) in enumerate(zip(subsets, axes, dist_subset_labels)):
    sns.heatmap(get_syn_effect(c[c.subset == subset], pairs, motifs, tasks), cmap='RdBu_r', center=1, ax=ax, 
                cbar=i==len(subsets)-1,
                vmin=.7, vmax=1.3)
    ax.set_title(subset_title)
plt.savefig(figures_dir / 'heatmap.synergy-score.20perc.Wt-dAB>0.main_tasks.pdf')

Show all¶

mask = 'Wt-dAB>0'
terms = ['(2*Wt-dA-dB)/(Wt-dAB)']
filter_terms = dist_subsets
filter_names = dist_subset_labels
# filter_terms = ['center_diff <= 35', 'center_diff > 35']
c = pd.concat([(dfs.query(ft).groupby(['motif_pair', 'task']).
                apply(lambda x: x.eval(term).median()).
                reset_index().
                rename(columns={0:'average'}).
                assign(term=term, subset=ft, subset_name=filter_names[i]))
               for term in terms for i, ft in enumerate(filter_terms)], axis=0)
c['term'] = pd.Categorical(c['term'], categories=terms)
c['subset'] = pd.Categorical(c['subset'], categories=filter_terms)
c['subset_name'] = pd.Categorical(c['subset_name'], categories=filter_names)
c['motif_pair'] = pd.Categorical(c['motif_pair'], pair_names[::-1])

plotnine.options.figure_size = get_figsize(1, 0.6)
assert len(terms) == 1
fig = (ggplot(aes(y='motif_pair', x='task', fill='average')) + 
       geom_tile(data=c.dropna()) + 
       # geom_tile(fill='#d9d9d9', data=c[c.average.isnull()]) + 
       geom_text(label='/', data=c[c.average.isnull()]) + 
       theme_classic(base_size=10, base_family='Arial') + 
       facet_grid(". ~ subset_name") + 
       theme(legend_position='top', axis_text_x=element_text(rotation=30, hjust=1))  + 
       scale_fill_gradient2(low=cb_red, mid='white', high=cb_blue, midpoint=1, limits=[.6, 1.4], 
                            palette=PlotninePalette("RdBu_r", [0, 1])) +
       # scale_fill_gradient2(low=cb_blue, mid='white', high=cb_red, midpoint=1, limits=[0, 2]) +
       ggtitle(terms[0] + ", 90% Wt-dAB>0")
)
fig.save(figures_dir / 'heatmap.synergy-score.all.pdf')
fig

<ggplot: (-9223363277898619062)>

Major TF per motif¶

subsets = dist_subsets
fig,axes = plt.subplots(1, len(subsets), 
                        figsize=get_figsize(0.25*len(subsets), .8/len(subsets)), 
                        sharey=True)
for i, (subset,ax, subset_title) in enumerate(zip(subsets, axes, dist_subset_labels)):
    sns.heatmap(get_syn_effect(c[c.subset == subset], pairs, motifs, tasks), cmap='RdBu_r', center=1, ax=ax, 
                cbar=i==len(subsets)-1,
                vmin=.7, vmax=1.3)
    ax.set_title(subset_title)
plt.savefig(figures_dir / 'heatmap.synergy-score.all.main_tasks.pdf')

Directional effect¶

Scatterplot showing the major TF per motif¶

from basepair.exp.paper.config import profile_mapping

fig, axes = plt.subplots(len(motifs), len(motifs), 
                         figsize=get_figsize(.65, 1), 
                         gridspec_kw=dict(wspace=0, hspace=0),
                         sharex=True, sharey=True)
colors = plt.get_cmap("viridis")(np.linspace(0, .95, len(motifs)))
k = 0
for i in range(len(motifs)):
    for j in range(len(motifs)):
        ax = axes[i,j]
        if i>j:
            ax.axison = False
            continue
        idx = 'motif_pair_idx'
        cat = 'center_diff_cat'
        motif_pair = pairs[k]
        motif_pair_name = "<>".join(motif_pair)
        k+=1
        t1 = profile_mapping[motif_pair[0]]
        t2 = profile_mapping[motif_pair[1]]
        # t1 = 'Nanog'
        # t2 = 'Nanog'
        # motif mask
        mmask = (dfabf.motif_pair == motif_pair_name) & (dfabf.score == 'max_profile_count')
        ab = pd.merge(dfabf[mmask & (dfabf.task == t1)][[idx, 'x_alt_ref_pc', cat]],
                      dfabf[mmask & (dfabf.task == t2)][[idx, 'y_alt_ref_pc']],
                      on=idx)
        # make a scatterplot
        for ci, (label,df) in enumerate(list(ab.groupby(cat))):
            ax.scatter(df.y_alt_ref_pc,
                       df.x_alt_ref_pc,   # first one is on the y axis
                       alpha=0.2, s=1, label=label, color=colors[ci])
        
        # ax.text(0, 0,t1+"-"+t2)
        alpha = .5
        xl = [0, 2]
        ax.plot(xl, xl, c='grey', alpha=alpha)
        ax.axvline(1, c='grey', alpha=alpha)
        ax.axhline(1, c='grey', alpha=alpha)
        ax.set_xlim(xl)
        ax.set_ylim(xl)
        if i == j:
            ax.set_ylabel(motif_pair[1])
            ax.set_xlabel(motif_pair[0])
            
        # Use grey spines
        plt.setp(ax.spines.values(), color='grey')
        plt.setp([ax.get_xticklines(), ax.get_yticklines()], color='grey')
plt.legend(loc="bottom right", bbox_to_anchor=(-.2,.7),
           scatterpoints=1, markerscale=10, columnspacing=0,
           handletextpad=0, borderpad=0, frameon=False, title="Pairwise distance")
fig.savefig(figures_dir / "max_profile_count.upper-triag.width=0.65.pdf")

No pseudo-counts¶

fig, axes = plt.subplots(len(motifs), len(motifs), 
                         figsize=get_figsize(.65, 1),
                         gridspec_kw=dict(wspace=0, hspace=0),
                         sharex=True, sharey=True)
k = 0
for i in range(len(motifs)):
    for j in range(len(motifs)):
        ax = axes[i,j]
        if i>j:
            ax.axison = False
            continue
        idx = 'motif_pair_idx'
        cat = 'center_diff_cat'
        motif_pair = pairs[k]
        motif_pair_name = "<>".join(motif_pair)
        k+=1
        t1 = profile_mapping[motif_pair[0]]
        t2 = profile_mapping[motif_pair[1]]
        # t1 = 'Nanog'
        # t2 = 'Nanog'
        # motif mask
        mmask = (dfabf.motif_pair == motif_pair_name) & (dfabf.score == 'max_profile_count')
        ab = pd.merge(dfabf[mmask & (dfabf.task == t1)][[idx, 'x_alt_ref', cat]],
                      dfabf[mmask & (dfabf.task == t2)][[idx, 'y_alt_ref']],
                      on=idx)
        # make a scatterplot
        for ci, (label,df) in enumerate(list(ab.groupby(cat))):
            ax.scatter(df.y_alt_ref,
                       df.x_alt_ref,   # first one is on the y axis
                       alpha=0.2, s=1, label=label, color=colors[ci])
        # ax.text(0, 0,t1+"-"+t2)
        alpha = .5
        xl = [0, 2]
        ax.plot(xl, xl, c='grey', alpha=alpha)
        ax.axvline(1, c='grey', alpha=alpha)
        ax.axhline(1, c='grey', alpha=alpha)
        ax.set_xlim(xl)
        ax.set_ylim(xl)
        if i == j:
            ax.set_ylabel(motif_pair[1])
            ax.set_xlabel(motif_pair[0])
            
        # Use grey spines
        plt.setp(ax.spines.values(), color='grey')
        plt.setp([ax.get_xticklines(), ax.get_yticklines()], color='grey')
plt.legend(loc="bottom right", bbox_to_anchor=(-.2,.7),
           scatterpoints=1, markerscale=10, columnspacing=0,
           handletextpad=0, borderpad=0, frameon=False, title="Pairwise distance")
fig.savefig(figures_dir / "max_profile_count.upper-triag.no-pseudocounts.pdf")

All TF's per motif¶

import matplotlib.cm as cm
colors = plt.get_cmap("viridis")(np.linspace(0, .95, len(motifs)))
hex_colors = [plt.cm.colors.to_hex(a) for a in colors]

# use pair names
dfabf['motif_pair'] = pd.Categorical(dfabf['motif_pair'], pair_names)

from plotnine import *

plotnine.options.figure_size = get_figsize(1, 10/4)
fig = (ggplot(aes(x='x_alt_ref_pc', y='y_alt_ref_pc', color='center_diff_cat'), 
              dfabf[dfabf.score == 'max_profile_count']) + 
 geom_point(alpha=0.2, size=.05, shape='.', show_legend=False) + 
 scale_color_manual(hex_colors)+
 geom_point(alpha=1, size=5, data=dfabf[:0], shape='.') +   # for the legend
 # add vertical lines
 geom_abline(intercept=0, slope=1, color='grey', alpha=0.5, show_legend=False) + 
 geom_vline(xintercept=1, color='grey', alpha=0.5) + 
 geom_hline(yintercept=1, color='grey', alpha=0.5) + 
 facet_grid("motif_pair ~ task") + 
 xlim([0, 2]) + 
 ylim([0, 2]) + 
 theme_classic(base_size=10, base_family='Arial') + 
 xlab("B") + 
 ylab("A") + 
 ggtitle('max_profile_count')
)
fig.save(figures_dir / 'max_profile_count.all.pdf', raster=True)
display(fig)

<ggplot: (8769465047062)>

Heatmap version¶

def get_effect(dfabf, pairs, agg_fn=np.mean):
    motifxtask = [f"{motif} (task={profile_mapping[motif]})" for motif in motifs]
    dfp = pd.DataFrame(index=motifxtask, columns=motifs, dtype=float)
    k = 0
    for i in range(len(motifs)):
        for j in range(len(motifs)):
            if i>j:
                continue
            idx = 'motif_pair_idx'
            cat = 'center_diff_cat'
            motif_pair = pairs[k]
            motif_pair_name = "<>".join(motif_pair)
            k+=1
            t1 = profile_mapping[motif_pair[0]]
            t2 = profile_mapping[motif_pair[1]]
            # t1 = 'Nanog'
            # t2 = 'Nanog'
            # motif mask
            mmask = (dfabf.motif_pair == motif_pair_name) & (dfabf.score == 'max_profile_count')
            ab = pd.merge(dfabf[mmask & (dfabf.task == t1)][[idx, 'x_alt_ref_pc', cat]],
                          dfabf[mmask & (dfabf.task == t2)][[idx, 'y_alt_ref_pc']],
                          on=idx)
            dfp.loc[motif_pair[0] + f" (task={t1})"][motif_pair[1]] = agg_fn(ab.x_alt_ref_pc)
            dfp.loc[motif_pair[1] + f" (task={t2})"][motif_pair[0]] = agg_fn(ab.y_alt_ref_pc)
    dfp.columns = [r"$\Delta$"+c for c in dfp.columns]
    return dfp

Mean value¶

subsets = dist_subsets
fig,axes = plt.subplots(1, len(subsets), 
                        figsize=get_figsize(0.25*len(subsets), .8/len(subsets)), 
                        sharey=True)
for i, (subset,ax, subset_label) in enumerate(zip(subsets, axes, dist_subset_labels)):
    sns.heatmap(get_effect(dfabf.query(subset), pairs, agg_fn=np.mean), cmap='RdBu', center=1, ax=ax, 
                cbar=i==len(subsets)-1,
                vmin=.7, vmax=1.3)
    ax.set_title(subset_label)
plt.savefig(figures_dir / 'heatmap.mean_max_profile_count.directional_effect.pdf')

Median value¶

subsets = dist_subsets
fig,axes = plt.subplots(1, len(subsets), 
                        figsize=get_figsize(0.25*len(subsets), .8/len(subsets)), 
                        sharey=True)
for i, (subset,ax, subset_label) in enumerate(zip(subsets, axes, dist_subset_labels)):
    sns.heatmap(get_effect(dfabf.query(subset), pairs, agg_fn=np.median), cmap='RdBu', center=1, ax=ax, 
                cbar=i==len(subsets)-1,
                vmin=.7, vmax=1.3)
    ax.set_title(subset_label)
plt.savefig(figures_dir / 'heatmap.median_max_profile_count.directional_effect.pdf')

Values for all the tasks¶

motifxtask = [f"{motif} (task={task})" for motif in motifs for task in tasks]

def get_effect_all_tasks(dfabf, pairs, tasks, agg_fn=np.mean):
    motifxtask = [f"{motif} (task={task})" for motif in motifs for task in tasks]
    dfp = pd.DataFrame(index=motifxtask, columns=motifs, dtype=float)
    for task in tasks:
        k = 0
        for i in range(len(motifs)):
            for j in range(len(motifs)):
                if i>j:
                    continue
                idx = 'motif_pair_idx'
                cat = 'center_diff_cat'
                motif_pair = pairs[k]
                motif_pair_name = "<>".join(motif_pair)
                k+=1
                t1 = profile_mapping[motif_pair[0]]
                t2 = profile_mapping[motif_pair[1]]
                ab = dfabf[(dfabf.motif_pair == motif_pair_name) & (dfabf.score == 'max_profile_count') & (dfabf.task == task)]
                dfp.loc[motif_pair[0] + f" (task={task})"][motif_pair[1]] = agg_fn(ab.x_alt_ref_pc)
                dfp.loc[motif_pair[1] + f" (task={task})"][motif_pair[0]] = agg_fn(ab.y_alt_ref_pc)
    dfp.columns = [r"$\Delta$"+c for c in dfp.columns]
    return dfp

Median¶

subsets = dist_subsets
fig,axes = plt.subplots(1, len(subsets), 
                        figsize=get_figsize(0.25*len(subsets), .8/len(subsets)*4), 
                        sharey=True)
for i, (subset,ax, subset_label) in enumerate(zip(subsets, axes, dist_subset_labels)):
    sns.heatmap(get_effect_all_tasks(dfabf.query(subset), pairs, tasks=tasks, agg_fn=np.median), cmap='RdBu', center=1, ax=ax, 
                cbar=i==len(subsets)-1,
                vmin=.7, vmax=1.3)
    ax.set_title(subset_label)
plt.savefig(figures_dir / 'heatmap.median_max_profile_count.directional_effect.all-tasks.pdf')

Mean¶

subsets = dist_subsets
fig,axes = plt.subplots(1, len(subsets), 
                        figsize=get_figsize(0.25*len(subsets), .8/len(subsets)*4), 
                        sharey=True)
for i, (subset,ax, subset_label) in enumerate(zip(subsets, axes, dist_subset_labels)):
    sns.heatmap(get_effect_all_tasks(dfabf.query(subset), pairs, tasks=tasks, agg_fn=np.mean), cmap='RdBu', center=1, ax=ax, 
                cbar=i==len(subsets)-1,
                vmin=.7, vmax=1.3)
    ax.set_title(subset_label)
plt.savefig(figures_dir / 'heatmap.mean_max_profile_count.directional_effect.all-tasks.pdf')

Directness score¶

dfp = pd.read_csv("http://mitra.stanford.edu/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/deeplift/profile/pattern_table.csv")

dfp.head()

dfp = dfp[dfp.pattern.isin(list(motifs.values()))]
m_hash = {v:k for k,v in motifs.items()}
dfp['pattern_name'] = dfp.pattern.map(m_hash)
dfp = dfp.set_index('pattern_name')
dfp = dfp.loc[list(motifs)]

dfp_max = dfp[[c for c in dfp.columns if c.endswith(' footprint max')]]
dfp_max.columns = [c.split(" ")[0] for c in dfp_max.columns]
dfp_max = dfp_max[tasks]
dfp_max

dfp_counts = dfp[[c for c in dfp.columns if c.endswith(' footprint counts')]]
dfp_counts.columns = [c.split(" ")[0] for c in dfp_max.columns]
dfp_counts = dfp_counts[tasks]
dfp_counts

fig, axes = plt.subplots(1, 2, figsize=get_figsize(1, .35), sharey=True)
ax = axes[0]
sns.heatmap(dfp_max, cmap='Reds', ax=ax)
ax.set_ylabel("Motif")
ax.set_xlabel("Task");
ax.set_title("Avg. max profile counts");
ax = axes[1]
sns.heatmap(dfp_counts, cmap='Reds', ax=ax)
ax.set_ylabel("")
ax.set_xlabel("Task");
ax.set_title("Avg. total profile counts");
plt.savefig(figures_dir / 'heatmap.total-profile-counts.pdf')

Scratch space¶

Extra plots¶

Generate individual plots¶

For the scatter plots (directional interactions), generate png and pdf files for each interaction so I can use them directly.

The combined pdfs are too big and also too confusing to show on a slide or figure.

odir = figures_dir / "individual/max_profile_count"
odir.mkdir(parents=True, exist_ok=True)
colors = plt.get_cmap("viridis")(np.linspace(0, .95, len(motifs)))
k = 0
for i in range(len(motifs)):
    for j in range(len(motifs)):
        if i>j:
            continue
        fig, ax = plt.subplots(1, 1, figsize=get_figsize(.65 / 3, 1))
        idx = 'motif_pair_idx'
        cat = 'center_diff_cat'
        motif_pair = pairs[k]
        motif_pair_name = "<>".join(motif_pair)
        k+=1
        t1 = profile_mapping[motif_pair[0]]
        t2 = profile_mapping[motif_pair[1]]
        # t1 = 'Nanog'
        # t2 = 'Nanog'
        # motif mask
        mmask = (dfabf.motif_pair == motif_pair_name) & (dfabf.score == 'max_profile_count')
        ab = pd.merge(dfabf[mmask & (dfabf.task == t1)][[idx, 'x_alt_ref_pc', cat]],
                      dfabf[mmask & (dfabf.task == t2)][[idx, 'y_alt_ref_pc']],
                      on=idx)
        # make a scatterplot
        for ci, (label,df) in enumerate(list(ab.groupby(cat))):
            ax.scatter(df.y_alt_ref_pc,
                       df.x_alt_ref_pc,   # first one is on the y axis
                       alpha=0.2, s=1, label=label, color=colors[ci])
        
        # ax.text(0, 0,t1+"-"+t2)
        alpha = .5
        xl = [0, 2]
        ax.plot(xl, xl, c='grey', alpha=alpha)
        ax.axvline(1, c='grey', alpha=alpha)
        ax.axhline(1, c='grey', alpha=alpha)
        ax.set_xlim(xl)
        ax.set_ylim(xl)
        ax.set_xlabel(r"{} ($\Delta$ {})".format(motif_pair[1], motif_pair[0]))
        ax.set_ylabel(r"{} ($\Delta$ {})".format(motif_pair[0], motif_pair[1]))
            
        # Use grey spines
        plt.setp(ax.spines.values(), color='grey')
        plt.setp([ax.get_xticklines(), ax.get_yticklines()], color='grey')
        fig.savefig(odir / f'{motif_pair_name}.png')
        fig.savefig(odir / f'{motif_pair_name}.pdf')

odir = figures_dir / "individual/max_profile_count/all_tasks"
odir.mkdir(parents=True, exist_ok=True)
colors = plt.get_cmap("viridis")(np.linspace(0, .95, len(motifs)))

for task in tasks:
    k = 0
    for i in range(len(motifs)):
        for j in range(len(motifs)):
            if i>j:
                continue
            fig, ax = plt.subplots(1, 1, figsize=get_figsize(.65 / 3, 1))
            idx = 'motif_pair_idx'
            cat = 'center_diff_cat'
            motif_pair = pairs[k]
            motif_pair_name = "<>".join(motif_pair)
            k+=1
            t1 = profile_mapping[motif_pair[0]]
            t2 = profile_mapping[motif_pair[1]]
            
            ab = dfabf[(dfabf.motif_pair == motif_pair_name) & (dfabf.score == 'max_profile_count') & (dfabf.task == task)]
            # make a scatterplot
            for ci, (label,df) in enumerate(list(ab.groupby(cat))):
                ax.scatter(df.y_alt_ref_pc,
                           df.x_alt_ref_pc,   # first one is on the y axis
                           alpha=0.2, s=1, label=label, color=colors[ci])

            # ax.text(0, 0,t1+"-"+t2)
            alpha = .5
            xl = [0, 2]
            ax.plot(xl, xl, c='grey', alpha=alpha)
            ax.axvline(1, c='grey', alpha=alpha)
            ax.axhline(1, c='grey', alpha=alpha)
            ax.set_xlim(xl)
            ax.set_ylim(xl)
            ax.set_xlabel(r"{} ($\Delta$ {})".format(motif_pair[1], motif_pair[0]))
            ax.set_ylabel(r"{} ($\Delta$ {})".format(motif_pair[0], motif_pair[1]))
            ax.set_title(task)

            # Use grey spines
            plt.setp(ax.spines.values(), color='grey')
            plt.setp([ax.get_xticklines(), ax.get_yticklines()], color='grey')
            fig.savefig(odir / f'{motif_pair_name},task={task}.png')
            fig.savefig(odir / f'{motif_pair_name},task={task}.pdf')

	Unnamed: 0	Unnamed: 0.1	pattern_x	example_idx	pattern_start_x	pattern_end_x	strand_x	pattern_len_x	pattern_center_x	match_weighted_x	match_weighted_p_x	match_weighted_cat_x	match_max_x	match_max_task_x	imp_weighted_x	imp_weighted_p_x	imp_weighted_cat_x	imp_max_x	imp_max_task_x	seq_match_x	seq_match_p_x	seq_match_cat_x	match/Klf4_x	match/Nanog_x	match/Oct4_x	match/Sox2_x	imp/Klf4_x	imp/Nanog_x	imp/Oct4_x	imp/Sox2_x	example_chrom_x	example_start_x	example_end_x	example_strand_x	example_interval_from_task_x	pattern_short_x	pattern_name_x	pattern_start_abs_x	pattern_end_abs_x	pattern_center_aln_x	pattern_strand_aln_x	row_idx_x	pattern_y	pattern_start_y	pattern_end_y	strand_y	pattern_len_y	pattern_center_y	match_weighted_y	match_weighted_p_y	match_weighted_cat_y	match_max_y	match_max_task_y	imp_weighted_y	imp_weighted_p_y	imp_weighted_cat_y	imp_max_y	imp_max_task_y	seq_match_y	seq_match_p_y	seq_match_cat_y	match/Klf4_y	match/Nanog_y	match/Oct4_y	match/Sox2_y	imp/Klf4_y	imp/Nanog_y	imp/Oct4_y	imp/Sox2_y	example_chrom_y	example_start_y	example_end_y	example_strand_y	example_interval_from_task_y	pattern_short_y	pattern_name_y	pattern_start_abs_y	pattern_end_abs_y	pattern_center_aln_y	pattern_strand_aln_y	row_idx_y	center_diff	center_diff_aln	strand_combination	motif_pair	motif_pair_idx	task	score	x_alt	x_ref	x_alt_ref	x_alt_pc	x_ref_pc	x_alt_ref_pc	y_alt	y_ref	y_alt_ref	y_alt_pc	y_ref_pc	y_alt_ref_pc	center_diff_cat
0	0	1	metacluster_0/pattern_0	1	437.0	452.0	+	15.0	444.0	0.5391	0.5105	medium	0.5627	Sox2	0.9221	0.8271	low	1.0504	Oct4	9.6558	0.4369	medium	0.5452	0.4714	0.5524	0.5627	0.5605	1.0090	1.0504	0.9147	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	448.0	+	0.0	metacluster_0/pattern_0	458.0	473.0	+	15.0	465.0	0.5174	0.3763	medium	0.5613	Oct4	1.0389	0.9077	low	1.2170	Sox2	11.3139	0.7737	high	0.4683	0.4598	0.5613	0.5274	0.5500	1.1198	1.0884	1.2170	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	469.0	+	1.0	21.0	21.0	++	Oct4-Sox2<>Oct4-Sox2	0	Oct4	max_profile_count	97.6125	162.8281	0.5995	98.3200	163.5357	0.6012	138.5507	488.4471	0.2837	139.3577	489.2541	0.2848	(0, 35]
1	1	2	metacluster_0/pattern_0	1	437.0	452.0	+	15.0	444.0	0.5391	0.5105	medium	0.5627	Sox2	0.9221	0.8271	low	1.0504	Oct4	9.6558	0.4369	medium	0.5452	0.4714	0.5524	0.5627	0.5605	1.0090	1.0504	0.9147	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	448.0	+	0.0	metacluster_0/pattern_0	499.0	514.0	+	15.0	506.0	0.5154	0.3673	medium	0.5390	Oct4	1.3206	0.9862	low	1.5825	Oct4	9.8925	0.4740	medium	0.4916	0.4731	0.5390	0.5267	0.6803	1.0429	1.5825	1.5485	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	510.0	+	2.0	62.0	62.0	++	Oct4-Sox2<>Oct4-Sox2	1	Oct4	max_profile_count	71.4936	162.8281	0.4391	72.2012	163.5357	0.4415	403.7719	852.8242	0.4735	404.5789	853.6312	0.4740	(35, 70]
2	2	3	metacluster_0/pattern_0	1	437.0	452.0	+	15.0	444.0	0.5391	0.5105	medium	0.5627	Sox2	0.9221	0.8271	low	1.0504	Oct4	9.6558	0.4369	medium	0.5452	0.4714	0.5524	0.5627	0.5605	1.0090	1.0504	0.9147	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	448.0	+	0.0	metacluster_0/pattern_0	520.0	535.0	+	15.0	527.0	0.4897	0.2392	low	0.5008	Oct4	1.4250	0.9950	low	1.6570	Sox2	7.7218	0.1351	low	0.4555	0.4992	0.5008	0.4895	0.8025	1.2853	1.6062	1.6570	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	531.0	+	3.0	83.0	83.0	++	Oct4-Sox2<>Oct4-Sox2	2	Oct4	max_profile_count	131.1055	162.8281	0.8052	131.8130	163.5357	0.8060	113.7232	261.8408	0.4343	114.5302	262.6478	0.4361	(70, 150]
3	3	4	metacluster_0/pattern_0	1	437.0	452.0	+	15.0	444.0	0.5391	0.5105	medium	0.5627	Sox2	0.9221	0.8271	low	1.0504	Oct4	9.6558	0.4369	medium	0.5452	0.4714	0.5524	0.5627	0.5605	1.0090	1.0504	0.9147	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	448.0	+	0.0	metacluster_0/pattern_0	541.0	556.0	+	15.0	548.0	0.4843	0.2165	low	0.5198	Oct4	1.2319	0.9746	low	1.4439	Oct4	9.6558	0.4369	medium	0.4183	0.4421	0.5198	0.5058	0.6828	1.0318	1.4439	1.4188	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	552.0	+	4.0	104.0	104.0	++	Oct4-Sox2<>Oct4-Sox2	3	Oct4	max_profile_count	105.7563	162.8281	0.6495	106.4639	163.5357	0.6510	340.2409	652.8372	0.5212	341.0479	653.6442	0.5218	(70, 150]
4	4	7	metacluster_0/pattern_0	1	458.0	473.0	+	15.0	465.0	0.5174	0.3763	medium	0.5613	Oct4	1.0389	0.9077	low	1.2170	Sox2	11.3139	0.7737	high	0.4683	0.4598	0.5613	0.5274	0.5500	1.1198	1.0884	1.2170	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	469.0	+	1.0	metacluster_0/pattern_0	499.0	514.0	+	15.0	506.0	0.5154	0.3673	medium	0.5390	Oct4	1.3206	0.9862	low	1.5825	Oct4	9.8925	0.4740	medium	0.4916	0.4731	0.5390	0.5267	0.6803	1.0429	1.5825	1.5485	chr3	1.2215e+08	1.2215e+08	*	Oct4	m0_p0	Oct4-Sox2	1.2215e+08	1.2215e+08	510.0	+	2.0	41.0	41.0	++	Oct4-Sox2<>Oct4-Sox2	4	Oct4	max_profile_count	148.8586	488.4471	0.3048	149.5661	489.1546	0.3058	655.5236	852.8242	0.7687	656.3306	853.6312	0.7689	(35, 70]

	Unnamed: 0	Wt_obs	Wt	dA	dB	dAB	motif_pair	task	center_diff	strand_combination
0	0	16198.0	58234.414	28042.203	38972.977	23831.658	Oct4-Sox2<>Oct4-Sox2	Oct4	21.0	++
1	1	16198.0	58234.414	28042.203	17461.543	9037.368	Oct4-Sox2<>Oct4-Sox2	Oct4	62.0	++
2	2	16198.0	58234.414	28042.203	37178.540	22600.363	Oct4-Sox2<>Oct4-Sox2	Oct4	83.0	++
3	3	16198.0	58234.414	28042.203	23178.790	13922.051	Oct4-Sox2<>Oct4-Sox2	Oct4	104.0	++
4	4	16198.0	58234.414	38972.977	17461.543	9234.978	Oct4-Sox2<>Oct4-Sox2	Oct4	41.0	++

	Unnamed: 0	pattern	example_idx	pattern_start	pattern_end	strand	pattern_len	pattern_center	match_weighted	match_weighted_p	match_weighted_cat	match_max	match_max_task	imp_weighted	imp_weighted_p	imp_weighted_cat	imp_max	imp_max_task	seq_match	seq_match_p	seq_match_cat	match/Klf4	match/Nanog	match/Oct4	match/Sox2	imp/Klf4	imp/Nanog	imp/Oct4	imp/Sox2	example_chrom	example_start	example_end	example_strand	example_interval_from_task	pattern_short	pattern_name	pattern_start_abs	pattern_end_abs	pattern_center_aln	pattern_strand_aln	row_idx
0	10	metacluster_0/pattern_0	1	437	452	+	15	444	0.5391	0.5105	medium	0.5627	Sox2	0.9221	0.8271	low	1.0504	Oct4	9.6558	0.4369	medium	0.5452	0.4714	0.5524	0.5627	0.5605	1.0090	1.0504	0.9147	chr3	122145063	122146063	*	Oct4	m0_p0	Oct4-Sox2	122145500	122145515	448	+	0
1	11	metacluster_0/pattern_0	1	458	473	+	15	465	0.5174	0.3763	medium	0.5613	Oct4	1.0389	0.9077	low	1.2170	Sox2	11.3139	0.7737	high	0.4683	0.4598	0.5613	0.5274	0.5500	1.1198	1.0884	1.2170	chr3	122145063	122146063	*	Oct4	m0_p0	Oct4-Sox2	122145521	122145536	469	+	1
2	14	metacluster_0/pattern_0	1	499	514	+	15	506	0.5154	0.3673	medium	0.5390	Oct4	1.3206	0.9862	low	1.5825	Oct4	9.8925	0.4740	medium	0.4916	0.4731	0.5390	0.5267	0.6803	1.0429	1.5825	1.5485	chr3	122145063	122146063	*	Oct4	m0_p0	Oct4-Sox2	122145562	122145577	510	+	2
3	16	metacluster_0/pattern_0	1	520	535	+	15	527	0.4897	0.2392	low	0.5008	Oct4	1.4250	0.9950	low	1.6570	Sox2	7.7218	0.1351	low	0.4555	0.4992	0.5008	0.4895	0.8025	1.2853	1.6062	1.6570	chr3	122145063	122146063	*	Oct4	m0_p0	Oct4-Sox2	122145583	122145598	531	+	3
4	18	metacluster_0/pattern_0	1	541	556	+	15	548	0.4843	0.2165	low	0.5198	Oct4	1.2319	0.9746	low	1.4439	Oct4	9.6558	0.4369	medium	0.4183	0.4421	0.5198	0.5058	0.6828	1.0318	1.4439	1.4188	chr3	122145063	122146063	*	Oct4	m0_p0	Oct4-Sox2	122145604	122145619	552	+	4

	Unnamed: 0	Wt_obs	Wt	dA	dB	dAB	motif_pair	task	center_diff	strand_combination
0	0	16198.0	58234.414	28042.203	38972.977	23831.658	Oct4-Sox2<>Oct4-Sox2	Oct4	21.0	++
1	1	16198.0	58234.414	28042.203	17461.543	9037.368	Oct4-Sox2<>Oct4-Sox2	Oct4	62.0	++
2	2	16198.0	58234.414	28042.203	37178.540	22600.363	Oct4-Sox2<>Oct4-Sox2	Oct4	83.0	++
3	3	16198.0	58234.414	28042.203	23178.790	13922.051	Oct4-Sox2<>Oct4-Sox2	Oct4	104.0	++
4	4	16198.0	58234.414	38972.977	17461.543	9234.978	Oct4-Sox2<>Oct4-Sox2	Oct4	41.0	++

	i	pattern	cluster	n seqlets	Klf4/d_p	Nanog/d_p	Oct4/d_p	Sox2/d_p	Klf4 footprint counts	Klf4 footprint entropydiff	Klf4 footprint max	Klf4 footprint strandcor	Klf4 imp counts	Klf4 imp profile	Klf4 periodicity 10bp	Klf4 pos meandiff	Klf4 pos std	Klf4 pos unimodal	Klf4 region counts	Klf4/d	Nanog footprint counts	Nanog footprint entropydiff	Nanog footprint max	Nanog footprint strandcor	Nanog imp counts	Nanog imp profile	Nanog periodicity 10bp	Nanog pos meandiff	Nanog pos std	Nanog pos unimodal	Nanog region counts	Nanog/d	Oct4 footprint counts	Oct4 footprint entropydiff	Oct4 footprint max	Oct4 footprint strandcor	Oct4 imp counts	Oct4 imp profile	Oct4 periodicity 10bp	Oct4 pos meandiff	Oct4 pos std	Oct4 pos unimodal	Oct4 region counts	Oct4/d	Sox2 footprint counts	Sox2 footprint entropydiff	Sox2 footprint max	Sox2 footprint strandcor	Sox2 imp counts	Sox2 imp profile	Sox2 periodicity 10bp	Sox2 pos meandiff	Sox2 pos std	Sox2 pos unimodal	Sox2 region counts	Sox2/d	consensus	ic pwm mean	idx
0	0	m1_p7	8	157.0	95.0820	11.4754	0.8197	0.8197	183.4872	0.2647	1.5417	0.0059	0.0168	0.0285	0.0077	2.0164	95.7851	True	387.5287	0.0401	133.1731	0.1340	0.8974	0.0051	-0.0103	-9.0438e-04	0.0009	1.8610	185.0268	True	367.1019	0.0085	149.9744	0.1425	0.9423	0.0050	-4.8469e-04	-8.0106e-04	0.0019	1.9562	127.6654	True	422.9363	-0.0011	60.1218	0.2124	0.5737	0.0048	-0.0016	-2.6529e-03	0.0016	1.8105	121.0656	True	199.4968	-0.0038	GCCCCGCCCCACCCT	1.3623	37
1	1	m1_p8	8	162.0	90.9836	13.1148	8.1967	4.9180	154.7716	0.2412	1.2716	0.0059	0.0106	0.0198	0.0572	2.0037	61.0182	True	302.7778	0.0291	74.3827	0.1929	0.5123	0.0049	-0.0038	2.4813e-03	0.0169	2.3406	104.3285	True	212.5494	0.0088	85.1667	0.1113	0.4969	0.0049	5.1559e-04	1.1783e-03	0.0248	2.2167	96.4021	True	276.4321	0.0018	37.7284	0.1553	0.2593	0.0049	0.0005	-4.5393e-05	0.0183	2.0258	93.9946	True	146.6173	-0.0006	GGGAAGGGTGGGGCAGTAGA	0.9815	38
2	2	m6_p2	8	355.0	80.3279	24.5902	40.9836	27.8689	220.2955	0.2445	1.9148	0.0061	0.0053	0.0123	0.0073	2.0044	87.2336	True	421.7324	0.0193	288.6136	0.1758	1.9446	0.0052	0.0027	9.0652e-03	0.0038	1.9253	75.9526	True	537.9211	0.0154	167.0597	0.1038	1.1847	0.0050	2.4447e-03	3.4294e-03	0.0055	1.9122	139.0525	True	430.0113	0.0044	68.9744	0.1055	0.4290	0.0051	0.0027	3.2214e-03	0.0027	1.7937	138.7371	True	202.9465	0.0037	CAGAGTGCTGGGATTAAAGGC...	0.3143	109
3	3	m1_p0	8	12735.0	100.0000	25.4098	4.9180	3.2787	182.8839	0.2046	1.5890	0.0061	0.0263	0.0500	0.0159	1.9985	70.1436	True	355.6721	0.0736	115.8733	0.0249	0.5408	0.0051	-0.0140	7.4599e-04	0.0065	2.0307	146.5852	True	307.2621	0.0155	102.3714	0.0258	0.5333	0.0050	-8.0866e-04	9.4931e-05	0.0132	2.0174	155.3548	True	339.2477	0.0010	48.2119	0.0220	0.2416	0.0050	-0.0007	-1.2597e-03	0.0053	2.0489	165.0139	True	173.3736	-0.0018	TGGGTGTGGC	1.2984	30
4	4	m1_p1	8	2064.0	95.9016	16.3934	2.4590	2.4590	164.3267	0.1382	1.0644	0.0056	0.0204	0.0303	0.0954	1.9978	59.0731	True	349.1899	0.0402	38.5148	0.0186	0.1699	0.0050	-0.0103	4.1849e-06	0.1099	2.0319	154.9271	True	139.5499	0.0103	77.7647	0.0188	0.3607	0.0050	-9.5728e-06	-1.6580e-04	0.0898	1.9667	109.5886	True	286.3324	-0.0003	28.1585	0.0213	0.1181	0.0050	-0.0002	-1.5025e-03	0.0829	1.9582	172.8593	True	135.0795	-0.0028	GGCCCCGCCCT	1.1518	31

	Oct4	Sox2	Nanog	Klf4
pattern_name
Oct4-Sox2	5.1700	1.9023	1.4743	0.5510
Sox2	1.0024	3.0287	4.7277	1.2223
Nanog	0.7602	0.2945	2.9368	0.4001
Klf4	0.5333	0.2416	0.5408	1.5890

	Oct4	Sox2	Nanog	Klf4
pattern_name
Oct4-Sox2	116.8492	289.9074	401.0843	135.1212
Sox2	200.3641	516.2857	216.5016	188.5086
Nanog	96.6386	274.3910	154.1314	63.4457
Klf4	182.8839	115.8733	102.3714	48.2119

Figure 5,6¶

TODO¶

Load the data¶

TODO¶

Co-occurence test¶

<150bp¶

<35bp¶

All distance ranges¶

Pairwise distance distribution¶

Nanog¶

Synergy scores¶

Fraction of positive values¶

Synergy scores: (2*Wt-dA-dB)/(Wt-dAB)¶

Require: 90% Wt-dAB>0¶

Setup table showing only the major TF's¶

Major TF per motif¶

Require: 20% Wt-dAB>0¶

Major TF per motif¶

Show all¶

Major TF per motif¶

Directional effect¶

Scatterplot showing the major TF per motif¶

No pseudo-counts¶

All TF's per motif¶

Heatmap version¶

Mean value¶

Median value¶

Values for all the tasks¶

Median¶

Mean¶

Directness score¶

Scratch space¶

Extra plots¶

Generate individual plots¶

Synergy scores: `(2*Wt-dA-dB)/(Wt-dAB)`¶