Goal¶

make the paper figures for the hyper-parameters

Tasks¶

[x] gather the experiment table

TODO¶

[x] Put the weighting into context
[~] Make all the plots and assemble them together in Illustrator
[ ] Use the same y and x axis span for all the hyper-paramter plots (except the multi-TF plots)

# Imports
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from basepair.imports import *
from basepair.exp.paper.config import tf_colors
from basepair.functions import mean
from basepair.cli.imp_score import ImpScoreFile

Using TensorFlow backend.

paper_config()

fig_nexus_hp = Path(f"{ddir}/figures/model-evaluation/ChIP-nexus/hyper-parameters")
fig_seq_hp = Path(f"{ddir}/figures/model-evaluation/ChIP-seq/hyper-parameters")

osn_tfs = ['Oct4', 'Sox2', 'Nanog']
osnk_tfs = ['Oct4', 'Sox2', 'Nanog', 'Klf4']

!mkdir -p {fig_seq_hp}
!mkdir -p {fig_nexus_hp}

df = pd.read_csv("output/model.results.finished.csv")

df.set_index('exp', inplace=True)

# Setup the profile loss
df['best-epoch/val_profile_loss'] = 0
for tf in osnk_tfs:
    x = df[f'best-epoch/val_{tf}/profile_loss']
    not_null = ~ x.isnull()
    df['best-epoch/val_profile_loss'][not_null] += x[not_null]

# Setup the profile loss
df['best-epoch/val_counts_loss'] = 0
for tf in osnk_tfs:
    x = df[f'best-epoch/val_{tf}/counts_loss']
    not_null = ~ x.isnull()
    df['best-epoch/val_counts_loss'][not_null] += x[not_null]

len(df)

14

nexus_metric_profile = 'valid-peaks/avg/profile/binsize=1/auprc' 
# nexus_metric_profile = 'best-epoch/val_loss' 
nexus_metric_counts = 'valid-peaks/avg/counts/spearmanr'
nexus_metric = 'best-epoch/val_loss'
seq_metric = 'best-epoch/val_loss'
seq_metric_profile = 'best-epoch/val_profile_loss'
seq_metric_profile2 = 'valid-peaks/avg/profile/binsize=1/auprc' 
seq_metric_counts = 'valid-peaks/avg/counts/spearmanr'

# Plot params
profile_auprc_name = 'Profile auPRC'
counts_spearman_name = r"Total counts $R_{s}$"

s_default = 20

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'nexus-single-task')])

exps

['nexus,peaks,O,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE',
 'nexus,peaks,S,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE',
 'nexus,peaks,N,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE',
 'nexus,peaks,K,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE']

nexus_metric_profile

'valid-peaks/avg/profile/binsize=1/auprc'

{tf: df.loc[nexus_default_exp][f'valid-peaks/{tf}/profile/binsize=1/auprc'] for tf in osnk_tfs}

{'Oct4': 0.1833659351513462,
 'Sox2': 0.3802538585433689,
 'Nanog': 0.4486560937694963,
 'Klf4': 0.15679826648753378}

Multi-task¶

# Multi-task
v = {tf: df.loc[nexus_default_exp][f'valid-peaks/{tf}/profile/binsize=1/auprc'] for tf in osnk_tfs}
v

{'Oct4': 0.1833659351513462,
 'Sox2': 0.3802538585433689,
 'Nanog': 0.4486560937694963,
 'Klf4': 0.15679826648753378}

mean(list(v.values()))

0.2922685384879363

# Single task
v= {tf: dict(df.loc[exps][['tfs', nexus_metric_profile]].set_index("tfs").iloc[:,0])[tf[0]] for tf in osnk_tfs}
v

{'Oct4': 0.20845759163615404,
 'Sox2': 0.4250493674411673,
 'Nanog': 0.4710824673004411,
 'Klf4': 0.17793632002159224}

mean(list(v.values()))

0.32063143659983867

Single-task¶

# Multi-task
v = {tf: df.loc[nexus_default_exp][f'valid-peaks/{tf}/counts/spearmanr'] for tf in osnk_tfs}
v

{'Oct4': 0.479076032353049,
 'Sox2': 0.44157320459105576,
 'Nanog': 0.6012117950669067,
 'Klf4': 0.5773766993655419}

mean(list(v.values()))

0.5248094328441384

# Single task
v = {tf: dict(df.loc[exps][['tfs', f'valid-peaks/{tf}/counts/spearmanr']].set_index("tfs").iloc[:,0])[tf[0]] for tf in osnk_tfs}
v

{'Oct4': 0.4984590360702481,
 'Sox2': 0.4642387531706982,
 'Nanog': 0.585168601772147,
 'Klf4': 0.5841399987556414}

mean(list(v.values()))

0.5330015974421837

ChIP-nexus hyper-parameters¶

nexus_default_exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE-2'

Learning rate¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'lr-nexus')]) + [nexus_default_exp]

list(df.loc[exps].sort_values('lr')['lr'])

[0.0005, 0.001, 0.002, 0.004, 0.005, 0.01, 0.02, 0.04, 0.05]

x_var = 'lr'

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[nexus_metric_profile], s=s_default)
ax.set_ylabel(profile_auprc_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_ylabel(counts_spearman_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

De-conv size¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'deconv')]) + [nexus_default_exp]

x_var = 'tconv_kernel_size'

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)

ax.scatter(dfs[x_var], dfs[nexus_metric_profile], s=s_default)
ax.set_ylabel(profile_auprc_name);
plt.xticks([1, 10, 25, 35])

ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
ax.axvline(25, color='grey', alpha=0.2)
plt.xticks([1, 10, 25, 35])
ax.scatter(dfs[x_var], dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_ylabel(counts_spearman_name);

ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

Number of layers (same padding)¶

Compute the receptive field:

from basepair.models import seq_bpnet_cropped_extra_seqlen

n_layers = np.arange(1, 14)
receptive_field = [seq_bpnet_cropped_extra_seqlen(conv1_kernel_size=25,
                               n_dil_layers=nl-1,
                               tconv_kernel_size=1,
                               target_seqlen=0) + 1
                   for nl in n_layers]
print(pd.DataFrame({"receptive_field": receptive_field, "n_layers": n_layers}).to_string())

    receptive_field  n_layers
0                25         1
1                29         2
2                37         3
3                53         4
4                85         5
5               149         6
6               277         7
7               533         8
8              1045         9
9              2069        10
10             4117        11
11             8213        12
12            16405        13

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'n layers')& (df.padding == 'same')]) + [nexus_default_exp]

x_var = 'n_dil_layers'

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/profile/binsize=1/auprc'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(profile_auprc_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.xticks([1, 5, 10])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xlabel("Number of Layers");
plt.ylabel(counts_spearman_name);
ax.set_xlabel("Number of layers");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

Profile vs regression weight¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'regression_weight')]) + [nexus_default_exp]

x_var = 'regression_weight'

## Compute the median value of the total counts for each task
profiles = ImpScoreFile(f"output/{nexus_default_exp}/deeplift.imp_score.h5").get_profiles()

nexus_median_N = np.median(mean([p.sum(axis=-2).mean(axis=-1) for t,p in profiles.items()]))
print(nexus_median_N)

130.5

nexus_natural_weight = nexus_median_N // 2

midpoint = 1
dfs = df.loc[exps].sort_values(x_var)
fig, axes = plt.subplots(2, 1, figsize=get_figsize(.35, 1), sharex=True, gridspec_kw=dict(hspace=0))
ax = axes[0]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / nexus_natural_weight, dfs[nexus_metric_profile], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Profile\nauPRC');
ax.set_xlim([5e-3, 5e2])
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);

ax = axes[1]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var]  / nexus_natural_weight, dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Tot. counts $R_{s}$');
ax.set_xlabel("Relative total count weight");
ax.set_xlim([5e-3, 5e2]);
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
fig.savefig(fig_nexus_hp / f'{x_var}.both-auprc-spearman.pdf', bbox_inches='tight')

ChIP-seq hyper-parameters¶

seq_default_exp = 'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE'

Learning rate¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'lr-nexus')]) + [seq_default_exp]

list(df.loc[exps].sort_values('lr')['lr'])

[0.0005, 0.001, 0.002, 0.004, 0.005, 0.01, 0.02, 0.04, 0.05]

seq_metric_name = 'Profile LL'

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], -dfs[seq_metric_profile], s=s_default)
ax.set_ylabel(seq_metric_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[seq_metric_counts], s=s_default)
ax.set_ylabel(counts_spearman_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

De-conv size¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'deconv')]) + [seq_default_exp]

x_var = 'tconv_kernel_size'

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)

ax.scatter(dfs[x_var], -dfs[seq_metric_profile], s=s_default)
ax.set_ylabel(seq_metric_name);
plt.xticks([1, 20, 50, 100])

ax.set_xlabel("De-convolution size");
fig.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(50, color='grey', alpha=0.2)
plt.xticks([1, 20, 50, 100])
ax.scatter(dfs[x_var], dfs[seq_metric_counts], s=s_default)
ax.set_ylabel(counts_spearman_name);

ax.set_xlabel("De-convolution size");
fig.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

Number of layers (same padding)¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'n layers')& (df.padding == 'same')]) + [seq_default_exp]

x_var = 'n_dil_layers'

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, -dfs[f'best-epoch/val_{tf}/profile_loss'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(seq_metric_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(counts_spearman_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

Profile vs regression weight¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'regression_weight')]) + [seq_default_exp]

x_var = 'regression_weight'

## Compute the median value of the total counts for each task
profiles = ImpScoreFile(f"output/{seq_default_exp}/deeplift.imp_score.h5").get_profiles()

seq_median_N = np.median(mean([p.sum(axis=-2).mean(axis=-1) for t,p in profiles.items()]))
print(seq_median_N)

49.0

seq_natural_weight = seq_median_N // 2

midpoint = 1
dfs = df.loc[exps].sort_values(x_var)
fig, axes = plt.subplots(2, 1, figsize=get_figsize(.35, 1), sharex=True, gridspec_kw=dict(hspace=0))
ax = axes[0]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / seq_natural_weight, -dfs[f'best-epoch/val_{tf}/profile_loss'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Profile LL');
ax.set_xlim([5e-3, 5e2])
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);

ax = axes[1]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var]  / seq_natural_weight, dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Tot. counts $R_{s}$');
ax.set_xlabel("Relative total count weight");
ax.set_xlim([5e-3, 5e2]);
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
fig.savefig(fig_seq_hp / f'{x_var}.both-profileLL-spearman.pdf', bbox_inches='tight')

Genome-wide models¶

Chip-nexus¶

Learning rate¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'binary - lr')])

list(df.loc[exps].sort_values('lr')['lr'])

[0.0005, 0.001, 0.002, 0.004, 0.005, 0.01, 0.02, 0.04, 0.05]

gw_binary_metric = 'valid-genome-wide/avg/class/auPR'

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_nexus_hp / f'{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')

features = [f'valid-genome-wide/{tf}/class/{feature}'
           for tf in osnk_tfs for feature in ['frac_positive', 'n_negative', 'n_positive']]
dict(dfs[features].drop_duplicates().dropna().iloc[0])

{'valid-genome-wide/Oct4/class/frac_positive': 0.005918009992432528,
 'valid-genome-wide/Oct4/class/n_negative': 9913927.0,
 'valid-genome-wide/Oct4/class/n_positive': 59020.0,
 'valid-genome-wide/Sox2/class/frac_positive': 0.001896630955724521,
 'valid-genome-wide/Sox2/class/n_negative': 9954032.0,
 'valid-genome-wide/Sox2/class/n_positive': 18915.0,
 'valid-genome-wide/Nanog/class/frac_positive': 0.008883432349535198,
 'valid-genome-wide/Nanog/class/n_negative': 9884353.0,
 'valid-genome-wide/Nanog/class/n_positive': 88594.0,
 'valid-genome-wide/Klf4/class/frac_positive': 0.012622447507241341,
 'valid-genome-wide/Klf4/class/n_negative': 9847064.0,
 'valid-genome-wide/Klf4/class/n_positive': 125883.0}

Profile importance¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'binary + profile')])
default = 'nexus,gw,OSNK,1,0,0,FALSE,valid,0.5,64,25,0.001,9,FALSE'

x_var = 'profile_weight'

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter((dfs[x_var]), dfs[f'valid-genome-wide/avg/class/auPR'], s=s_default)
ax.axhline(df.loc[default][f'valid-genome-wide/avg/class/auPR'])
# ax.set_ylim([0.07, 0.15])
ax.set_xlim([5e-3, 2])
ax.set_ylabel("auPR");
ax.set_xscale('log')
ax.set_xlabel("Profile importance");
fig.savefig(fig_nexus_hp / f'{x_var}.gw-binary.auprc.pdf', bbox_inches='tight')

Chip-seq¶

Learning rate¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'binary - lr')])

list(df.loc[exps].sort_values('lr')['lr'])

[0.0005, 0.001, 0.002, 0.004, 0.005, 0.01, 0.02, 0.04, 0.05]

gw_binary_metric = 'valid-genome-wide/avg/class/auPR'

x_var = 'lr'

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_seq_hp / f'{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')

features = [f'valid-genome-wide/{tf}/class/{feature}'
           for tf in osn_tfs for feature in ['frac_positive', 'n_negative', 'n_positive']]
dict(dfs[features].drop_duplicates().dropna().iloc[0])

{'valid-genome-wide/Oct4/class/frac_positive': 0.001396404224027518,
 'valid-genome-wide/Oct4/class/n_negative': 9958831.0,
 'valid-genome-wide/Oct4/class/n_positive': 13926.0,
 'valid-genome-wide/Sox2/class/frac_positive': 0.0005940182840111315,
 'valid-genome-wide/Sox2/class/n_negative': 9966833.0,
 'valid-genome-wide/Sox2/class/n_positive': 5924.0,
 'valid-genome-wide/Nanog/class/frac_positive': 0.0033060065536541197,
 'valid-genome-wide/Nanog/class/n_negative': 9939787.0,
 'valid-genome-wide/Nanog/class/n_positive': 32970.0}

Note¶

ChIP-nexus has roughly 3x more peaks hence auPR is higher.

Adding profile weight¶

default = 'seq,gw,OSN,1,0,0,FALSE,valid,0.5,64,50,0.001,9,FALSE'
exps = list(df.index[(df.assay == 'seq') & (df.note == 'binary + profile')])

x_var = 'profile_weight'

dfs = df.loc[exps].sort_values(x_var)


# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter((dfs[x_var]), dfs[f'valid-genome-wide/avg/class/auPR'], s=s_default)
ax.axhline(df.loc[default][f'valid-genome-wide/avg/class/auPR'])
ax.set_ylim([0.07, 0.15])
ax.set_xlim([5e-3, 2])
ax.set_ylabel("auPR");
ax.set_xscale('log')
ax.set_xlabel("Profile importance");
fig.savefig(fig_seq_hp / f'{x_var}.gw-binary.auprc.pdf', bbox_inches='tight')

Extra¶

ChIP-nexus¶

Number of layers (valid padding)¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'n layers')& (df.padding == 'valid')])

x_var = 'n_dil_layers'

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/profile/binsize=1/auprc'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(profile_auprc_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_nexus_hp / f'{x_var}.valid-padding.profile-auprc.pdf', bbox_inches='tight')

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.xticks([1, 5, 10])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xlabel("Number of Layers");
plt.ylabel(counts_spearman_name);
ax.set_xlabel("Number of layers");
fig.savefig(fig_nexus_hp / f'{x_var}.valid-padding.counts-spearman.pdf', bbox_inches='tight')

seq_metric_profile

'best-epoch/val_profile_loss'

ChIP-seq¶

Number of layers (valid padding)¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'n layers')& (df.padding == 'valid')])

x_var = 'n_dil_layers'

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, -dfs[f'best-epoch/val_{tf}/profile_loss'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(seq_metric_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.valid-padding.profile-ll.pdf', bbox_inches='tight')

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(counts_spearman_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.valid-padding.counts-spearman.pdf', bbox_inches='tight')