Goal¶

make the paper figures for the hyper-parameters

Tasks¶

[x] gather the experiment table

TODO¶

[x] Put the weighting into context
[~] Make all the plots and assemble them together in Illustrator
[ ] Use the same y and x axis span for all the hyper-paramter plots (except the multi-TF plots)

# Imports
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from basepair.imports import *
from basepair.exp.paper.config import tf_colors
from basepair.functions import mean
from basepair.cli.imp_score import ImpScoreFile

[autoreload of basepair.preproc failed: Traceback (most recent call last):
  File "/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 244, in check
    superreload(m, reload, self.old_objects)
  File "/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 376, in superreload
    module = reload(module)
  File "/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/imp.py", line 315, in reload
    return importlib.reload(module)
  File "/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/importlib/__init__.py", line 166, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 618, in _exec
  File "<frozen importlib._bootstrap_external>", line 678, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/users/avsec/workspace/basepair/basepair/preproc.py", line 301, in <module>
    @gin.configurable
  File "/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/gin/config.py", line 1129, in configurable
    return perform_decoration(decoration_target)
  File "/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/gin/config.py", line 1126, in perform_decoration
    return _make_configurable(fn_or_cls, name, module, whitelist, blacklist)
ValueError: A configurable matching 'basepair.preproc.IntervalAugmentor' already exists.
]

paper_config()

fig_nexus_hp = Path(f"{ddir}/figures/model-evaluation/ChIP-nexus/hyper-parameters-v2")
fig_seq_hp = Path(f"{ddir}/figures/model-evaluation/ChIP-seq/hyper-parameters-v2")

osn_tfs = ['Oct4', 'Sox2', 'Nanog']
osnk_tfs = ['Oct4', 'Sox2', 'Nanog', 'Klf4']

!mkdir -p {fig_seq_hp}
!mkdir -p {fig_nexus_hp}

df = pd.read_csv("output/model.results.finished.csv")

df.set_index('exp', inplace=True)

# Setup the profile loss
df['best-epoch/val_profile_loss'] = 0
for tf in osnk_tfs:
    x = df[f'best-epoch/val_{tf}/profile_loss']
    not_null = ~ x.isnull()
    df['best-epoch/val_profile_loss'][not_null] += x[not_null]

# Setup the profile loss
df['best-epoch/val_counts_loss'] = 0
for tf in osnk_tfs:
    x = df[f'best-epoch/val_{tf}/counts_loss']
    not_null = ~ x.isnull()
    df['best-epoch/val_counts_loss'][not_null] += x[not_null]

len(df)

251

nexus_metric_profile = 'valid-peaks/avg/profile/binsize=1/auprc' 
nexus_metric_profile2 = 'best-epoch/val_profile_loss'
# nexus_metric_profile = 'best-epoch/val_loss' 
nexus_metric_counts = 'valid-peaks/avg/counts/spearmanr'
nexus_metric = 'best-epoch/val_loss'
seq_metric = 'best-epoch/val_loss'
seq_metric_profile = 'best-epoch/val_profile_loss'
seq_metric_profile2 = 'valid-peaks/avg/profile/binsize=1/auprc' 
seq_metric_counts = 'valid-peaks/avg/counts/spearmanr'

# Plot params
profile_auprc_name = 'Profile auPRC'
counts_spearman_name = r"Total counts $R_{s}$"

s_default = 20

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'nexus-single-task')])

exps

['nexus,peaks,O,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE',
 'nexus,peaks,S,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE',
 'nexus,peaks,N,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE',
 'nexus,peaks,K,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE']

nexus_metric_profile

'valid-peaks/avg/profile/binsize=1/auprc'

nexus_default_exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE-2'

Multi-task¶

# Multi-task
v = {tf: df.loc[nexus_default_exp][f'valid-peaks/{tf}/profile/binsize=1/auprc'] for tf in osnk_tfs}
v

{'Oct4': 0.1833659351513462,
 'Sox2': 0.3802538585433689,
 'Nanog': 0.4486560937694963,
 'Klf4': 0.15679826648753378}

mean(list(v.values()))

0.2922685384879363

# Single task
v= {tf: dict(df.loc[exps][['tfs', nexus_metric_profile]].set_index("tfs").iloc[:,0])[tf[0]] for tf in osnk_tfs}
v

{'Oct4': 0.20845759163615404,
 'Sox2': 0.4250493674411673,
 'Nanog': 0.4710824673004411,
 'Klf4': 0.17793632002159224}

mean(list(v.values()))

0.32063143659983867

Single-task¶

# Multi-task
v = {tf: df.loc[nexus_default_exp][f'valid-peaks/{tf}/counts/spearmanr'] for tf in osnk_tfs}
v

{'Oct4': 0.479076032353049,
 'Sox2': 0.44157320459105576,
 'Nanog': 0.6012117950669067,
 'Klf4': 0.5773766993655419}

mean(list(v.values()))

0.5248094328441384

# Single task
v = {tf: dict(df.loc[exps][['tfs', f'valid-peaks/{tf}/counts/spearmanr']].set_index("tfs").iloc[:,0])[tf[0]] for tf in osnk_tfs}
v

{'Oct4': 0.4984590360702481,
 'Sox2': 0.4642387531706982,
 'Nanog': 0.585168601772147,
 'Klf4': 0.5841399987556414}

mean(list(v.values()))

0.5330015974421837

ChIP-nexus hyper-parameters¶

nexus_default_exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE-2'

Learning rate¶

df.sort_values(nexus_metric_profile, ascending=False)

exps = list(df.index[(df.lr < 0.05) & (df.assay == 'nexus') & (df.note == 'lr-nexus')& (df.augment_interval == False)]) + [nexus_default_exp]

list(df.loc[exps].sort_values('lr')['lr'])

[0.0005, 0.001, 0.002, 0.004, 0.005, 0.01, 0.02, 0.04]

x_var = 'lr'

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[nexus_metric_profile], s=s_default)
ax.set_ylabel(profile_auprc_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], -dfs[nexus_metric_profile2], s=s_default)
ax.set_ylabel(profile_auprc_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
# fig.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_ylabel(counts_spearman_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

De-conv size¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'deconv4')])

x_var = 'tconv_kernel_size'

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)

ax.scatter(dfs[x_var], dfs[nexus_metric_profile], s=s_default)
ax.set_ylabel(profile_auprc_name);
plt.xticks([1, 10, 25, 35])

ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
ax.axvline(25, color='grey', alpha=0.2)
plt.xticks([1, 10, 25, 35])
ax.scatter(dfs[x_var], dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_ylabel(counts_spearman_name);

ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

Number of layers (same padding)¶

Compute the receptive field:

from basepair.models import seq_bpnet_cropped_extra_seqlen

n_layers = np.arange(1, 14)
receptive_field = [seq_bpnet_cropped_extra_seqlen(conv1_kernel_size=25,
                               n_dil_layers=nl-1,
                               tconv_kernel_size=1,
                               target_seqlen=0) + 1
                   for nl in n_layers]
print(pd.DataFrame({"receptive_field": receptive_field, "n_layers": n_layers}).to_string())

    receptive_field  n_layers
0                25         1
1                29         2
2                37         3
3                53         4
4                85         5
5               149         6
6               277         7
7               533         8
8              1045         9
9              2069        10
10             4117        11
11             8213        12
12            16405        13

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'n layers')& (df.padding == 'same')]) + [nexus_default_exp]

x_var = 'n_dil_layers'

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/profile/binsize=1/auprc'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(profile_auprc_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.xticks([1, 5, 10])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xlabel("Number of Layers");
plt.ylabel(counts_spearman_name);
ax.set_xlabel("Number of layers");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

Profile vs regression weight¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'regression_weight') & (df.padding == 'same')]) + [nexus_default_exp]

x_var = 'regression_weight'

## Compute the median value of the total counts for each task
profiles = ImpScoreFile(f"output/{nexus_default_exp}/deeplift.imp_score.h5").get_profiles()

nexus_median_N = np.median(mean([p.sum(axis=-2).mean(axis=-1) for t,p in profiles.items()]))
print(nexus_median_N)

130.5

nexus_natural_weight = nexus_median_N // 2

midpoint = 1
dfs = df.loc[exps].sort_values(x_var)
fig, axes = plt.subplots(2, 1, figsize=get_figsize(.35, 1), sharex=True, gridspec_kw=dict(hspace=0))
ax = axes[0]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / nexus_natural_weight, dfs[nexus_metric_profile], s=s_default)
ax.axvline(x=10/nexus_natural_weight, color='grey', linestyle='--', alpha=0.1)
ax.set_xscale('log')
ax.set_ylabel('Profile\nauPRC');
ax.set_xlim([5e-3, 5e2])
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);

ax = axes[1]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var]  / nexus_natural_weight, dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Tot. counts $R_{s}$');
ax.set_xlabel("Relative total count weight");
ax.set_xlim([5e-3, 5e2]);
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
fig.savefig(fig_nexus_hp / f'{x_var}.both-auprc-spearman.pdf', bbox_inches='tight')

ChIP-seq hyper-parameters¶

seq_default_exp = 'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE'

Learning rate¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'lr-nexus')]) + [seq_default_exp]

list(df.loc[exps].sort_values('lr')['lr'])

[0.0005, 0.001, 0.002, 0.004, 0.005, 0.01, 0.02, 0.04, 0.05]

seq_metric_name = 'Profile LL'

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], -dfs[seq_metric_profile], s=s_default)
ax.set_ylabel(seq_metric_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[seq_metric_counts], s=s_default)
ax.set_ylabel(counts_spearman_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

De-conv size¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'deconv')]) + [seq_default_exp]

x_var = 'tconv_kernel_size'

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)

ax.scatter(dfs[x_var], -dfs[seq_metric_profile], s=s_default)
ax.set_ylabel(seq_metric_name);
plt.xticks([1, 20, 50, 100])

ax.set_xlabel("De-convolution size");
fig.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(50, color='grey', alpha=0.2)
plt.xticks([1, 20, 50, 100])
ax.scatter(dfs[x_var], dfs[seq_metric_counts], s=s_default)
ax.set_ylabel(counts_spearman_name);

ax.set_xlabel("De-convolution size");
fig.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

De-conv size 2¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'deconv2')])

x_var = 'tconv_kernel_size'

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)

ax.scatter(dfs[x_var], -dfs[seq_metric_profile], s=s_default)
ax.set_ylabel(seq_metric_name);
plt.xticks([1, 20, 50, 100])

ax.set_xlabel("De-convolution size");
# fig.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(50, color='grey', alpha=0.2)
plt.xticks([1, 20, 50, 100])
ax.scatter(dfs[x_var], dfs[seq_metric_counts], s=s_default)
ax.set_ylabel(counts_spearman_name);

ax.set_xlabel("De-convolution size");
# fig.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

Number of layers (same padding)¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'n layers')& (df.padding == 'same')]) + [seq_default_exp]

x_var = 'n_dil_layers'

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, -dfs[f'best-epoch/val_{tf}/profile_loss'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(seq_metric_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(counts_spearman_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

Profile vs regression weight¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'regression_weight') & (df.seed == 'None')]) + [seq_default_exp]

x_var = 'regression_weight'

## Compute the median value of the total counts for each task
profiles = ImpScoreFile(f"output/{seq_default_exp}/deeplift.imp_score.h5").get_profiles()

seq_median_N = np.median(mean([p.sum(axis=-2).mean(axis=-1) for t,p in profiles.items()]))
print(seq_median_N)

49.0

seq_natural_weight = seq_median_N // 2

midpoint = 1
dfs = df.loc[exps].sort_values(x_var)
fig, axes = plt.subplots(2, 1, figsize=get_figsize(.35, 1), sharex=True, gridspec_kw=dict(hspace=0))
ax = axes[0]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / seq_natural_weight, -dfs[f'best-epoch/val_profile_loss'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Profile LL');
ax.set_xlim([5e-3, 5e2])
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);

ax = axes[1]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var]  / seq_natural_weight, dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Tot. counts $R_{s}$');
ax.set_xlabel("Relative total count weight");
ax.set_xlim([5e-3, 5e2]);
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
fig.savefig(fig_seq_hp / f'{x_var}.both-profileLL-spearman.pdf', bbox_inches='tight')

Profile vs regression weight 2¶

list(df.index[(df.assay == 'seq') & (df.note == 'deconv2') & (df.tconv_kernel_size == 50)])

['seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42']

df.index[(df.assay == 'seq') & (df.seed == "42")]

Index(['seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,1,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,10,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,20,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,30,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,40,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,60,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,70,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,80,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,100,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,1,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,2,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,5,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,20,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,50,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,100,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,200,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,500,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,1000,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,2000,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,5000,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10000,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42'],
      dtype='object', name='exp')

exps = (list(df.index[(df.assay == 'seq') & (df.seed == "42") & (df.note == 'regression_weight')]) + list(df.index[(df.assay == 'seq') & (df.note == 'deconv2') & (df.tconv_kernel_size == 50)]))

x_var = 'regression_weight'

# ## Compute the median value of the total counts for each task
# profiles = ImpScoreFile(f"output/{seq_default_exp}/deeplift.imp_score.h5").get_profiles()

# seq_median_N = np.median(mean([p.sum(axis=-2).mean(axis=-1) for t,p in profiles.items()]))
# print(seq_median_N)

seq_natural_weight = seq_median_N // 2

midpoint = 1
dfs = df.loc[exps].sort_values(x_var)
fig, axes = plt.subplots(2, 1, figsize=get_figsize(.35, 1), sharex=True, gridspec_kw=dict(hspace=0))
ax = axes[0]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / seq_natural_weight, -dfs[f'best-epoch/val_profile_loss'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Profile LL');
ax.set_xlim([5e-3, 5e2])
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);

ax = axes[1]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var]  / seq_natural_weight, dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Tot. counts $R_{s}$');
ax.set_xlabel("Relative total count weight");
ax.set_xlim([5e-3, 5e2]);
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
# fig.savefig(fig_seq_hp / f'{x_var}.both-profileLL-spearman.pdf', bbox_inches='tight')

Genome-wide models¶

Chip-nexus¶

Learning rate¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'binary - lr')])

list(df.loc[exps].sort_values('lr')['lr'])

[0.0005,
 0.0005,
 0.001,
 0.001,
 0.002,
 0.002,
 0.004,
 0.004,
 0.005,
 0.005,
 0.01,
 0.01,
 0.02,
 0.02,
 0.04,
 0.04,
 0.05,
 0.05]

gw_binary_metric = 'valid-genome-wide/avg/class/auPR'

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_nexus_hp / f'{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')

features = [f'valid-genome-wide/{tf}/class/{feature}'
           for tf in osnk_tfs for feature in ['frac_positive', 'n_negative', 'n_positive']]
dict(dfs[features].drop_duplicates().dropna().iloc[0])

{'valid-genome-wide/Oct4/class/frac_positive': 0.005917937004352722,
 'valid-genome-wide/Oct4/class/n_negative': 9914050.0,
 'valid-genome-wide/Oct4/class/n_positive': 59020.0,
 'valid-genome-wide/Sox2/class/frac_positive': 0.001896607564170311,
 'valid-genome-wide/Sox2/class/n_negative': 9954155.0,
 'valid-genome-wide/Sox2/class/n_positive': 18915.0,
 'valid-genome-wide/Nanog/class/frac_positive': 0.008883322788268809,
 'valid-genome-wide/Nanog/class/n_negative': 9884476.0,
 'valid-genome-wide/Nanog/class/n_positive': 88594.0,
 'valid-genome-wide/Klf4/class/frac_positive': 0.012622291831903316,
 'valid-genome-wide/Klf4/class/n_negative': 9847187.0,
 'valid-genome-wide/Klf4/class/n_positive': 125883.0}

Profile importance¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'binary + profile')& (df.padding == 'same')])
default = 'nexus,gw,OSNK,1,0,0,FALSE,same,0.5,64,25,0.001,9,FALSE'

x_var = 'profile_weight'

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter((dfs[x_var]), dfs[f'valid-genome-wide/avg/class/auPR'], s=s_default)
ax.axhline(df.loc[default][f'valid-genome-wide/avg/class/auPR'])
# ax.set_ylim([0.07, 0.15])
ax.set_xlim([5e-3, 2])
ax.set_ylabel("auPR");
ax.set_xscale('log')
ax.set_xlabel("Profile importance");
fig.savefig(fig_nexus_hp / f'{x_var}.gw-binary.auprc.pdf', bbox_inches='tight')

Chip-seq¶

Learning rate¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'binary - lr')])

list(df.loc[exps].sort_values('lr')['lr'])

[0.0005,
 0.0005,
 0.001,
 0.001,
 0.002,
 0.002,
 0.004,
 0.004,
 0.005,
 0.005,
 0.01,
 0.01,
 0.02,
 0.02,
 0.04,
 0.04,
 0.05,
 0.05]

gw_binary_metric = 'valid-genome-wide/avg/class/auPR'

x_var = 'lr'

x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_seq_hp / f'{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')

features = [f'valid-genome-wide/{tf}/class/{feature}'
           for tf in osn_tfs for feature in ['frac_positive', 'n_negative', 'n_positive']]
dict(dfs[features].drop_duplicates().dropna().iloc[0])

{'valid-genome-wide/Oct4/class/frac_positive': 0.0013963865814930346,
 'valid-genome-wide/Oct4/class/n_negative': 9958957.0,
 'valid-genome-wide/Oct4/class/n_positive': 13926.0,
 'valid-genome-wide/Sox2/class/frac_positive': 0.0005940107790294942,
 'valid-genome-wide/Sox2/class/n_negative': 9966959.0,
 'valid-genome-wide/Sox2/class/n_positive': 5924.0,
 'valid-genome-wide/Nanog/class/frac_positive': 0.003305964784706689,
 'valid-genome-wide/Nanog/class/n_negative': 9939913.0,
 'valid-genome-wide/Nanog/class/n_positive': 32970.0}

Note¶

ChIP-nexus has roughly 3x more peaks hence auPR is higher.

Adding profile weight¶

default = 'seq,gw,OSN,1,0,0,FALSE,same,0.5,64,50,0.001,9,FALSE'
exps = list(df.index[(df.assay == 'seq') & (df.note == 'binary + profile')& (df.padding == 'same')])

x_var = 'profile_weight'

dfs = df.loc[exps].sort_values(x_var)


# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter((dfs[x_var]), dfs[f'valid-genome-wide/avg/class/auPR'], s=s_default)
ax.axhline(df.loc[default][f'valid-genome-wide/avg/class/auPR'])
ax.set_ylim([0.07, 0.15])
ax.set_xlim([5e-3, 2])
ax.set_ylabel("auPR");
ax.set_xscale('log')
ax.set_xlabel("Profile importance");
fig.savefig(fig_seq_hp / f'{x_var}.same-padding.gw-binary.auprc.pdf', bbox_inches='tight')

default = 'seq,gw,OSN,1,0,0,FALSE,valid,0.5,64,50,0.001,9,FALSE'
exps = list(df.index[(df.assay == 'seq') & (df.note == 'binary + profile')& (df.padding == 'valid')])

x_var = 'profile_weight'

dfs = df.loc[exps].sort_values(x_var)


# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter((dfs[x_var]), dfs[f'valid-genome-wide/avg/class/auPR'], s=s_default)
ax.axhline(df.loc[default][f'valid-genome-wide/avg/class/auPR'])
ax.set_ylim([0.07, 0.15])
ax.set_xlim([5e-3, 2])
ax.set_ylabel("auPR");
ax.set_xscale('log')
ax.set_xlabel("Profile importance");
fig.savefig(fig_seq_hp / f'{x_var}.valid-padding.gw-binary.auprc.pdf', bbox_inches='tight')

Extra¶

ChIP-nexus¶

Number of layers (valid padding)¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'n layers')& (df.padding == 'valid')])

x_var = 'n_dil_layers'

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/profile/binsize=1/auprc'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(profile_auprc_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_nexus_hp / f'{x_var}.valid-padding.profile-auprc.pdf', bbox_inches='tight')

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.xticks([1, 5, 10])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xlabel("Number of Layers");
plt.ylabel(counts_spearman_name);
ax.set_xlabel("Number of layers");
fig.savefig(fig_nexus_hp / f'{x_var}.valid-padding.counts-spearman.pdf', bbox_inches='tight')

De-conv size¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'deconv') & (df.padding == 'valid')]) + [nexus_default_exp]

x_var = 'tconv_kernel_size'

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)

ax.scatter(dfs[x_var], dfs[nexus_metric_profile], s=s_default)
ax.set_ylabel(profile_auprc_name);
plt.xticks([1, 10, 25, 35])

ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.valid-padding.profile-auprc.pdf', bbox_inches='tight')

dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
ax.axvline(25, color='grey', alpha=0.2)
plt.xticks([1, 10, 25, 35])
ax.scatter(dfs[x_var], dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_ylabel(counts_spearman_name);

ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

ChIP-seq¶

Number of layers (valid padding)¶

exps = list(df.index[(df.assay == 'seq') & (df.note == 'n layers')& (df.padding == 'valid')])

x_var = 'n_dil_layers'

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, -dfs[f'best-epoch/val_{tf}/profile_loss'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(seq_metric_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.valid-padding.profile-ll.pdf', bbox_inches='tight')

fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(counts_spearman_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.valid-padding.counts-spearman.pdf', bbox_inches='tight')

Classification models¶

Basset¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'basset')])
gw_binary_metric = 'valid-genome-wide/avg/class/auPR'
x_var = 'dropout'

dfs = df.loc[exps].sort_values(x_var)
dfs[gw_binary_metric].max()

0.24305993415698324

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
# ax.set_xscale("log")
# ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Dropout");
fig.savefig(fig_seq_hp / f'basset.{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')

Factorized Basset¶

exps = list(df.index[(df.assay == 'nexus') & (df.note == 'factorized-basset')])

dfs = df.loc[exps].sort_values(x_var)
dfs[gw_binary_metric].max()

0.23801075095517246

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
# ax.set_xscale("log")
# ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Dropout");
fig.savefig(fig_seq_hp / f'factorized-basset.{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')

	assay	augment_interval	batch_size	best-epoch/Klf4/class_loss	best-epoch/Klf4/counts_loss	best-epoch/Klf4/profile_loss	best-epoch/Nanog/class_loss	best-epoch/Nanog/counts_loss	best-epoch/Nanog/profile_loss	best-epoch/Oct4/class_loss	best-epoch/Oct4/counts_loss	best-epoch/Oct4/profile_loss	best-epoch/Sox2/class_loss	best-epoch/Sox2/counts_loss	best-epoch/Sox2/profile_loss	best-epoch/epoch	best-epoch/loss	best-epoch/val_Klf4/class_loss	best-epoch/val_Klf4/counts_loss	best-epoch/val_Klf4/profile_loss	best-epoch/val_Nanog/class_loss	best-epoch/val_Nanog/counts_loss	best-epoch/val_Nanog/profile_loss	best-epoch/val_Oct4/class_loss	best-epoch/val_Oct4/counts_loss	best-epoch/val_Oct4/profile_loss	best-epoch/val_Sox2/class_loss	best-epoch/val_Sox2/counts_loss	best-epoch/val_Sox2/profile_loss	best-epoch/val_loss	bias_pool	binary_weight	bn	dataspec	filters	gin_bindings	gin_files	imp_score	lr	merge_profile_reg	modisco_tasks	n_dil_layers	note	p_peak	padding	peak_width	profile_weight	r_modisco	r_wandb	region	regression_weight	run_modisco	seed	seq_width	stats/train/h:m:s	stats/train/io_in	stats/train/io_out	stats/train/max_pss	stats/train/max_rss	stats/train/max_uss	stats/train/max_vms	stats/train/mean_load	stats/train/s	tconv_kernel_size	tfs	train	train-peaks/Klf4/counts/mad	train-peaks/Klf4/counts/mse	train-peaks/Klf4/counts/pearsonr	train-peaks/Klf4/counts/spearmanr	train-peaks/Klf4/counts/var_explained	train-peaks/Klf4/profile/binsize=1/auprc	train-peaks/Klf4/profile/binsize=1/frac_ambigous	train-peaks/Klf4/profile/binsize=1/imbalance	train-peaks/Klf4/profile/binsize=1/n_positives	train-peaks/Klf4/profile/binsize=1/random_auprc	train-peaks/Klf4/profile/binsize=10/auprc	train-peaks/Klf4/profile/binsize=10/frac_ambigous	train-peaks/Klf4/profile/binsize=10/imbalance	train-peaks/Klf4/profile/binsize=10/n_positives	train-peaks/Klf4/profile/binsize=10/random_auprc	train-peaks/Nanog/counts/mad	train-peaks/Nanog/counts/mse	train-peaks/Nanog/counts/pearsonr	train-peaks/Nanog/counts/spearmanr	train-peaks/Nanog/counts/var_explained	train-peaks/Nanog/profile/binsize=1/auprc	train-peaks/Nanog/profile/binsize=1/frac_ambigous	train-peaks/Nanog/profile/binsize=1/imbalance	train-peaks/Nanog/profile/binsize=1/n_positives	train-peaks/Nanog/profile/binsize=1/random_auprc	train-peaks/Nanog/profile/binsize=10/auprc	train-peaks/Nanog/profile/binsize=10/frac_ambigous	train-peaks/Nanog/profile/binsize=10/imbalance	train-peaks/Nanog/profile/binsize=10/n_positives	train-peaks/Nanog/profile/binsize=10/random_auprc	train-peaks/Oct4/counts/mad	train-peaks/Oct4/counts/mse	train-peaks/Oct4/counts/pearsonr	train-peaks/Oct4/counts/spearmanr	train-peaks/Oct4/counts/var_explained	train-peaks/Oct4/profile/binsize=1/auprc	train-peaks/Oct4/profile/binsize=1/frac_ambigous	train-peaks/Oct4/profile/binsize=1/imbalance	train-peaks/Oct4/profile/binsize=1/n_positives	train-peaks/Oct4/profile/binsize=1/random_auprc	train-peaks/Oct4/profile/binsize=10/auprc	train-peaks/Oct4/profile/binsize=10/frac_ambigous	train-peaks/Oct4/profile/binsize=10/imbalance	train-peaks/Oct4/profile/binsize=10/n_positives	train-peaks/Oct4/profile/binsize=10/random_auprc	train-peaks/Sox2/counts/mad	train-peaks/Sox2/counts/mse	train-peaks/Sox2/counts/pearsonr	train-peaks/Sox2/counts/spearmanr	train-peaks/Sox2/counts/var_explained	train-peaks/Sox2/profile/binsize=1/auprc	train-peaks/Sox2/profile/binsize=1/frac_ambigous	train-peaks/Sox2/profile/binsize=1/imbalance	train-peaks/Sox2/profile/binsize=1/n_positives	train-peaks/Sox2/profile/binsize=1/random_auprc	train-peaks/Sox2/profile/binsize=10/auprc	train-peaks/Sox2/profile/binsize=10/frac_ambigous	train-peaks/Sox2/profile/binsize=10/imbalance	train-peaks/Sox2/profile/binsize=10/n_positives	train-peaks/Sox2/profile/binsize=10/random_auprc	train-peaks/avg/counts/mad	train-peaks/avg/counts/mse	train-peaks/avg/counts/pearsonr	train-peaks/avg/counts/spearmanr	train-peaks/avg/counts/var_explained	train-peaks/avg/profile/binsize=1/auprc	train-peaks/avg/profile/binsize=1/frac_ambigous	train-peaks/avg/profile/binsize=1/imbalance	train-peaks/avg/profile/binsize=1/n_positives	train-peaks/avg/profile/binsize=1/random_auprc	train-peaks/avg/profile/binsize=10/auprc	train-peaks/avg/profile/binsize=10/frac_ambigous	train-peaks/avg/profile/binsize=10/imbalance	train-peaks/avg/profile/binsize=10/n_positives	train-peaks/avg/profile/binsize=10/random_auprc	use_bias	valid-genome-wide/Klf4/class/accuracy	valid-genome-wide/Klf4/class/auPR	valid-genome-wide/Klf4/class/auROC	valid-genome-wide/Klf4/class/frac_positive	valid-genome-wide/Klf4/class/n_negative	valid-genome-wide/Klf4/class/n_positive	valid-genome-wide/Nanog/class/accuracy	valid-genome-wide/Nanog/class/auPR	valid-genome-wide/Nanog/class/auROC	valid-genome-wide/Nanog/class/frac_positive	valid-genome-wide/Nanog/class/n_negative	valid-genome-wide/Nanog/class/n_positive	valid-genome-wide/Oct4/class/accuracy	valid-genome-wide/Oct4/class/auPR	valid-genome-wide/Oct4/class/auROC	valid-genome-wide/Oct4/class/frac_positive	valid-genome-wide/Oct4/class/n_negative	valid-genome-wide/Oct4/class/n_positive	valid-genome-wide/Sox2/class/accuracy	valid-genome-wide/Sox2/class/auPR	valid-genome-wide/Sox2/class/auROC	valid-genome-wide/Sox2/class/frac_positive	valid-genome-wide/Sox2/class/n_negative	valid-genome-wide/Sox2/class/n_positive	valid-genome-wide/avg/class/accuracy	valid-genome-wide/avg/class/auPR	valid-genome-wide/avg/class/auROC	valid-genome-wide/avg/class/frac_positive	valid-genome-wide/avg/class/n_negative	valid-genome-wide/avg/class/n_positive	valid-peaks/Klf4/counts/mad	valid-peaks/Klf4/counts/mse	valid-peaks/Klf4/counts/pearsonr	valid-peaks/Klf4/counts/spearmanr	valid-peaks/Klf4/counts/var_explained	valid-peaks/Klf4/profile/binsize=1/auprc	valid-peaks/Klf4/profile/binsize=1/frac_ambigous	valid-peaks/Klf4/profile/binsize=1/imbalance	valid-peaks/Klf4/profile/binsize=1/n_positives	valid-peaks/Klf4/profile/binsize=1/random_auprc	valid-peaks/Klf4/profile/binsize=10/auprc	valid-peaks/Klf4/profile/binsize=10/frac_ambigous	valid-peaks/Klf4/profile/binsize=10/imbalance	valid-peaks/Klf4/profile/binsize=10/n_positives	valid-peaks/Klf4/profile/binsize=10/random_auprc	valid-peaks/Nanog/counts/mad	valid-peaks/Nanog/counts/mse	valid-peaks/Nanog/counts/pearsonr	valid-peaks/Nanog/counts/spearmanr	valid-peaks/Nanog/counts/var_explained	valid-peaks/Nanog/profile/binsize=1/auprc	valid-peaks/Nanog/profile/binsize=1/frac_ambigous	valid-peaks/Nanog/profile/binsize=1/imbalance	valid-peaks/Nanog/profile/binsize=1/n_positives	valid-peaks/Nanog/profile/binsize=1/random_auprc	valid-peaks/Nanog/profile/binsize=10/auprc	valid-peaks/Nanog/profile/binsize=10/frac_ambigous	valid-peaks/Nanog/profile/binsize=10/imbalance	valid-peaks/Nanog/profile/binsize=10/n_positives	valid-peaks/Nanog/profile/binsize=10/random_auprc	valid-peaks/Oct4/counts/mad	valid-peaks/Oct4/counts/mse	valid-peaks/Oct4/counts/pearsonr	valid-peaks/Oct4/counts/spearmanr	valid-peaks/Oct4/counts/var_explained	valid-peaks/Oct4/profile/binsize=1/auprc	valid-peaks/Oct4/profile/binsize=1/frac_ambigous	valid-peaks/Oct4/profile/binsize=1/imbalance	valid-peaks/Oct4/profile/binsize=1/n_positives	valid-peaks/Oct4/profile/binsize=1/random_auprc	valid-peaks/Oct4/profile/binsize=10/auprc	valid-peaks/Oct4/profile/binsize=10/frac_ambigous	valid-peaks/Oct4/profile/binsize=10/imbalance	valid-peaks/Oct4/profile/binsize=10/n_positives	valid-peaks/Oct4/profile/binsize=10/random_auprc	valid-peaks/Sox2/counts/mad	valid-peaks/Sox2/counts/mse	valid-peaks/Sox2/counts/pearsonr	valid-peaks/Sox2/counts/spearmanr	valid-peaks/Sox2/counts/var_explained	valid-peaks/Sox2/profile/binsize=1/auprc	valid-peaks/Sox2/profile/binsize=1/frac_ambigous	valid-peaks/Sox2/profile/binsize=1/imbalance	valid-peaks/Sox2/profile/binsize=1/n_positives	valid-peaks/Sox2/profile/binsize=1/random_auprc	valid-peaks/Sox2/profile/binsize=10/auprc	valid-peaks/Sox2/profile/binsize=10/frac_ambigous	valid-peaks/Sox2/profile/binsize=10/imbalance	valid-peaks/Sox2/profile/binsize=10/n_positives	valid-peaks/Sox2/profile/binsize=10/random_auprc	valid-peaks/avg/counts/mad	valid-peaks/avg/counts/mse	valid-peaks/avg/counts/pearsonr	valid-peaks/avg/counts/spearmanr	valid-peaks/avg/counts/var_explained	valid-peaks/avg/profile/binsize=1/auprc	valid-peaks/avg/profile/binsize=1/frac_ambigous	valid-peaks/avg/profile/binsize=1/imbalance	valid-peaks/avg/profile/binsize=1/n_positives	valid-peaks/avg/profile/binsize=1/random_auprc	valid-peaks/avg/profile/binsize=10/auprc	valid-peaks/avg/profile/binsize=10/frac_ambigous	valid-peaks/avg/profile/binsize=10/imbalance	valid-peaks/avg/profile/binsize=10/n_positives	valid-peaks/avg/profile/binsize=10/random_auprc	best-epoch/val_profile_loss	best-epoch/val_counts_loss
exp
nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.05,9,FALSE	nexus	False	128.0	NaN	0.6387	1038.3725	NaN	0.8809	1451.3745	NaN	0.4275	935.5277	NaN	0.3053	497.8172	7.0	3945.6162	NaN	0.6381	1031.9275	NaN	0.8843	1475.1878	NaN	0.4258	939.3719	NaN	0.3119	503.9134	3973.0010	NaN	0.0	False	ChIP-nexus.dataspec.yml	64.0	b_loss_weight=0;c_los...	problem-peaks.gin,joi...	profile/wn	0.050	False	NaN	9.0	lr-nexus	0.5	same	1000	1.00	NaN	wandb	peaks	10.0	False	None	1000	1:12:15	39.55	18.18	10132.39	43432.02	7718.66	659943.66	0.0	4335.1623	25.0	OSNK	True	0.6379	0.6389	-0.0006	-0.0004	-5.9605e-07	0.5013	0.064	0.0026	189381.0	0.5013	0.5136	0.3183	0.0273	147401.0	0.5136	0.7443	0.8799	0.0010	0.0011	-1.0729e-06	0.5027	0.058	5.3544e-03	409298.0	5.0268e-01	0.5199	0.2409	0.0398	245161.0	0.5199	0.4991	0.4275	0.0005	0.0009	0.0000	0.5015	0.0725	2.9488e-03	154097.0	5.0147e-01	0.5168	0.3639	0.0336	120478.0	0.5168	0.4103	0.3059	-0.0001	0.0005	-0.0013	0.5032	0.0703	0.0063	55119.0	0.5032	0.5275	0.3199	0.0551	35175.0	0.5275	0.5729	0.5631	0.0002	0.0005	-0.0003	0.5021	0.0662	0.0043	201973.7500	0.5021	0.5195	0.3108	0.0389	137053.7500	0.5195	False	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.6396	0.639	-0.0004	-0.0002	-4.7684e-07	0.5013	0.0636	0.0025	59593.0	0.5013	0.5135	0.3164	0.027	46678.0	0.5135	0.7425	0.8857	0.0003	0.0006	-4.5300e-06	5.0262e-01	0.0581	5.2392e-03	130301.0	5.0262e-01	0.5195	0.2430	0.0391	78059.0	0.5195	0.4984	0.4266	0.0013	0.0005	1.6093e-06	0.5014	0.0719	0.0029	48721.0	0.5014	0.5166	0.3615	0.0331	38842.0	0.5166	0.4120	0.3116	-0.0016	-0.0032	-0.0014	0.5029	0.0719	0.0058	17464.0	0.5029	0.5266	0.3297	0.0533	11634.0	0.5266	0.5731	0.5657	-8.6901e-05	-0.0006	-0.0003	0.5020	0.0664	0.0041	64019.7500	0.5020	0.5191	0.3126	0.0381	43803.2500	0.5191	3950.4006	2.2600
nexus,peaks,N,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE	nexus	False	128.0	NaN	NaN	NaN	NaN	0.5926	975.9495	NaN	NaN	NaN	NaN	NaN	NaN	7.0	981.8758	NaN	NaN	NaN	NaN	0.6156	1017.6103	NaN	NaN	NaN	NaN	NaN	NaN	1023.7664	NaN	0.0	False	ChIP-nexus.dataspec.yml	64.0	b_loss_weight=0;c_los...	problem-peaks.gin,joi...	profile/wn	0.004	False	NaN	9.0	nexus-single-task	0.5	same	1000	1.00	NaN	wandb	peaks	10.0	False	None	1000	0:30:40	2320.00	10.36	3650.31	28179.99	1667.71	483509.83	0.0	1840.9186	25.0	N	True	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.5735	0.5428	0.6249	0.6208	3.8509e-01	0.5286	0.058	5.3544e-03	409298.0	5.2839e-03	0.7919	0.2409	0.0398	245161.0	0.0392	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.5735	0.5428	0.6249	0.6208	0.3851	0.5286	0.0580	0.0054	409298.0000	0.0053	0.7919	0.2409	0.0398	245161.0000	0.0392	False	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.5944	0.6126	0.5599	0.5852	3.1338e-01	4.7108e-01	0.0581	5.2392e-03	130301.0	5.3311e-03	0.7491	0.2430	0.0391	78059.0	0.0402	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.5944	0.6126	5.5993e-01	0.5852	0.3134	0.4711	0.0581	0.0052	130301.0000	0.0053	0.7491	0.2430	0.0391	78059.0000	0.0402	1017.6103	0.6156
nexus,peaks,S,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE	nexus	False	128.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.2213	447.0558	7.0	449.2691	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.2335	455.5385	457.8733	NaN	0.0	False	ChIP-nexus.dataspec.yml	64.0	b_loss_weight=0;c_los...	problem-peaks.gin,joi...	profile/wn	0.004	False	NaN	9.0	nexus-single-task	0.5	same	1000	1.00	NaN	wandb	peaks	10.0	False	None	1000	0:26:24	1851.25	11.13	4681.15	32680.51	2337.08	518329.80	0.0	1584.6983	25.0	S	True	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.3430	0.2074	0.5770	0.5170	0.3280	0.5446	0.0703	0.0063	55119.0	0.0064	0.8461	0.3199	0.0551	35175.0	0.0552	0.3430	0.2074	0.5770	0.5170	0.3280	0.5446	0.0703	0.0063	55119.0000	0.0064	0.8461	0.3199	0.0551	35175.0000	0.0552	False	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.3566	0.2329	0.4995	0.4642	0.2495	0.4250	0.0719	0.0058	17464.0	0.0057	0.7744	0.3297	0.0533	11634.0	0.0570	0.3566	0.2329	4.9954e-01	0.4642	0.2495	0.4250	0.0719	0.0058	17464.0000	0.0057	0.7744	0.3297	0.0533	11634.0000	0.0570	455.5385	0.2335
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
seq,gw,OSN,1,0.5,0.05,FALSE,valid,0.5,64,50,0.001,9,FALSE	seq	False	128.0	NaN	NaN	NaN	0.2588	0.3496	289.9842	0.1928	0.2684	180.7970	0.0605	0.2151	248.4165	4.0	36.8885	NaN	NaN	NaN	0.0847	0.3432	129.4235	0.0558	0.2953	90.8904	0.0064	0.3036	181.7915	20.7233	NaN	1.0	False	ChIP-seq.dataspec.yml	64.0	b_loss_weight=1;c_los...	problem-gw.gin,joint-...	profile/wn	0.001	False	NaN	9.0	binary + profile	0.5	valid	1000	0.05	NaN	wandb	gw	0.5	False	None	3118	15:11:33	3073.88	174.81	8288.19	88768.73	7026.00	662280.29	0.0	54693.5833	50.0	OSN	True	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.5557	0.4482	0.8118	0.7782	6.5890e-01	0.0016	0.068	3.0869e-05	453.0	3.3133e-05	0.0242	0.2550	0.0003	325.0	0.0003	0.4148	0.2654	0.7908	0.7830	0.6250	0.0001	0.1049	2.5822e-05	30.0	1.7247e-05	0.0085	0.3771	0.0003	28.0	0.0003	0.3117	0.1541	0.7803	0.7462	0.5960	0.0176	0.1006	0.0006	490.0	0.0005	0.5364	0.3700	0.0074	453.0	0.0064	0.4274	0.2892	0.7943	0.7691	0.6266	0.0064	0.0912	0.0002	324.3333	0.0002	0.1897	0.3340	0.0027	268.6667	0.0023	False	NaN	NaN	NaN	NaN	NaN	NaN	0.9698	0.1337	0.9469	0.0033	9939787.0	32970.0	0.9848	0.0948	0.9298	0.0014	9958831.0	13926.0	0.9987	0.1151	0.8984	0.0006	9966833.0	5924.0	0.9844	0.1146	0.9250	0.0018	9.9552e+06	17606.6667	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.8901	1.1929	0.4159	0.3963	3.6664e-02	7.9673e-05	0.0666	2.2606e-05	105.0	2.4311e-05	0.0006	0.2487	0.0002	57.0	0.0001	0.6295	0.6252	0.5156	0.5056	1.6718e-01	NaN	0.1071	0.0000	0.0	NaN	NaN	0.3729	0.0000	0.0	NaN	0.4408	0.3262	0.4484	0.4377	0.1577	0.0027	0.1119	0.0004	97.0	0.0003	0.1142	0.4215	0.0057	97.0	0.0052	0.6535	0.7147	4.5993e-01	0.4465	0.1205	NaN	0.0952	0.0001	67.3333	NaN	NaN	0.3477	0.0020	51.3333	NaN	402.1055	0.9421
seq,gw,OSN,1,0.1,0.01,FALSE,valid,0.5,64,50,0.001,9,FALSE	seq	False	128.0	NaN	NaN	NaN	0.2478	0.4664	296.5045	0.1118	0.3244	182.5709	0.0265	0.2495	249.9536	5.0	7.7805	NaN	NaN	NaN	0.1169	0.4074	130.2945	0.0227	0.3280	91.0853	0.0056	0.3370	182.2588	4.2889	NaN	1.0	False	ChIP-seq.dataspec.yml	64.0	b_loss_weight=1;c_los...	problem-gw.gin,joint-...	profile/wn	0.001	False	NaN	9.0	binary + profile	0.5	valid	1000	0.01	NaN	wandb	gw	0.1	False	None	3118	17:11:44	5565.51	219.97	9027.00	90540.82	5099.82	663886.37	0.0	61904.7757	50.0	OSN	True	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.6001	0.5549	0.6687	0.6395	3.8208e-01	0.0005	0.068	3.0869e-05	453.0	2.8794e-05	0.0025	0.2550	0.0003	325.0	0.0003	0.4787	0.3558	0.7103	0.7127	0.4696	0.0001	0.1049	2.5822e-05	30.0	2.0761e-05	0.0066	0.3771	0.0003	28.0	0.0006	0.3024	0.1502	0.7011	0.6893	0.4706	0.0071	0.1006	0.0006	490.0	0.0006	0.2976	0.3700	0.0074	453.0	0.0079	0.4604	0.3537	0.6934	0.6805	0.4407	0.0026	0.0912	0.0002	324.3333	0.0002	0.1022	0.3340	0.0027	268.6667	0.0029	False	NaN	NaN	NaN	NaN	NaN	NaN	0.9520	0.0818	0.9189	0.0033	9939787.0	32970.0	0.9926	0.0876	0.9068	0.0014	9958831.0	13926.0	0.9984	0.0692	0.7974	0.0006	9966833.0	5924.0	0.9810	0.0795	0.8744	0.0018	9.9552e+06	17606.6667	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.8811	1.1945	0.3661	0.3486	-1.7562e-01	6.7402e-05	0.0666	2.2606e-05	105.0	2.7931e-05	0.0006	0.2487	0.0002	57.0	0.0002	0.6848	0.7186	0.4949	0.4746	6.9421e-02	NaN	0.1071	0.0000	0.0	NaN	NaN	0.3729	0.0000	0.0	NaN	0.4325	0.3146	0.4414	0.4244	0.0376	0.0028	0.1119	0.0004	97.0	0.0004	0.1096	0.4215	0.0057	97.0	0.0051	0.6661	0.7425	4.3413e-01	0.4159	-0.0229	NaN	0.0952	0.0001	67.3333	NaN	NaN	0.3477	0.0020	51.3333	NaN	403.6386	1.0724
seq,gw,OSN,0,10,1,FALSE,valid,0.5,64,50,0.001,9,FALSE	seq	False	128.0	NaN	NaN	NaN	NaN	0.3723	290.0194	NaN	0.2861	180.8420	NaN	0.2208	248.4145	2.0	728.0673	NaN	NaN	NaN	NaN	0.3705	129.5535	NaN	0.3080	90.7777	NaN	0.3030	181.7373	411.8831	NaN	0.0	False	ChIP-seq.dataspec.yml	64.0	b_loss_weight=0;c_los...	problem-gw.gin,joint-...	profile/wn	0.001	False	NaN	9.0	gw + profile	0.5	valid	1000	1.00	NaN	wandb	gw	10.0	False	None	3118	12:34:22	6119.18	290.29	8501.05	82482.75	5160.75	650402.36	0.0	45262.0300	50.0	OSN	True	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.4549	0.3144	0.8268	0.7980	6.7637e-01	0.0045	0.068	3.0869e-05	453.0	2.8209e-05	0.0300	0.2550	0.0003	325.0	0.0003	0.3805	0.2253	0.7914	0.7741	0.6260	0.0001	0.1049	2.5822e-05	30.0	2.5833e-05	0.0130	0.3771	0.0003	28.0	0.0004	0.2603	0.1117	0.7754	0.7318	0.5923	0.0139	0.1006	0.0006	490.0	0.0006	0.5905	0.3700	0.0074	453.0	0.0072	0.3653	0.2171	0.7979	0.7680	0.6316	0.0062	0.0912	0.0002	324.3333	0.0002	0.2112	0.3340	0.0027	268.6667	0.0026	False	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.8126	1.0394	0.4222	0.4001	-4.2650e-02	7.2238e-05	0.0666	2.2606e-05	105.0	2.8637e-05	0.0007	0.2487	0.0002	57.0	0.0001	0.5941	0.5724	0.5024	0.4889	1.2983e-01	NaN	0.1071	0.0000	0.0	NaN	NaN	0.3729	0.0000	0.0	NaN	0.3823	0.2609	0.4511	0.4366	0.1359	0.0025	0.1119	0.0004	97.0	0.0004	0.1531	0.4215	0.0057	97.0	0.0048	0.5963	0.6242	4.5856e-01	0.4419	0.0744	NaN	0.0952	0.0001	67.3333	NaN	NaN	0.3477	0.0020	51.3333	NaN	402.0685	0.9815