Goal

  • make the paper figures for the hyper-parameters

Tasks

  • [x] gather the experiment table

TODO

  • [x] Put the weighting into context
  • [~] Make all the plots and assemble them together in Illustrator
  • [ ] Use the same y and x axis span for all the hyper-paramter plots (except the multi-TF plots)
In [23]:
# Imports
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from basepair.imports import *
from basepair.exp.paper.config import tf_colors
from basepair.functions import mean
from basepair.cli.imp_score import ImpScoreFile
[autoreload of basepair.preproc failed: Traceback (most recent call last):
  File "/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 244, in check
    superreload(m, reload, self.old_objects)
  File "/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 376, in superreload
    module = reload(module)
  File "/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/imp.py", line 315, in reload
    return importlib.reload(module)
  File "/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/importlib/__init__.py", line 166, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 618, in _exec
  File "<frozen importlib._bootstrap_external>", line 678, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/users/avsec/workspace/basepair/basepair/preproc.py", line 301, in <module>
    @gin.configurable
  File "/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/gin/config.py", line 1129, in configurable
    return perform_decoration(decoration_target)
  File "/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/gin/config.py", line 1126, in perform_decoration
    return _make_configurable(fn_or_cls, name, module, whitelist, blacklist)
ValueError: A configurable matching 'basepair.preproc.IntervalAugmentor' already exists.
]
In [24]:
paper_config()
In [25]:
fig_nexus_hp = Path(f"{ddir}/figures/model-evaluation/ChIP-nexus/hyper-parameters-v2")
fig_seq_hp = Path(f"{ddir}/figures/model-evaluation/ChIP-seq/hyper-parameters-v2")
In [26]:
osn_tfs = ['Oct4', 'Sox2', 'Nanog']
osnk_tfs = ['Oct4', 'Sox2', 'Nanog', 'Klf4']
In [27]:
!mkdir -p {fig_seq_hp}
!mkdir -p {fig_nexus_hp}
In [28]:
df = pd.read_csv("output/model.results.finished.csv")
In [29]:
df.set_index('exp', inplace=True)
In [30]:
# Setup the profile loss
df['best-epoch/val_profile_loss'] = 0
for tf in osnk_tfs:
    x = df[f'best-epoch/val_{tf}/profile_loss']
    not_null = ~ x.isnull()
    df['best-epoch/val_profile_loss'][not_null] += x[not_null]
In [31]:
# Setup the profile loss
df['best-epoch/val_counts_loss'] = 0
for tf in osnk_tfs:
    x = df[f'best-epoch/val_{tf}/counts_loss']
    not_null = ~ x.isnull()
    df['best-epoch/val_counts_loss'][not_null] += x[not_null]
In [32]:
len(df)
Out[32]:
251
In [33]:
nexus_metric_profile = 'valid-peaks/avg/profile/binsize=1/auprc' 
nexus_metric_profile2 = 'best-epoch/val_profile_loss'
# nexus_metric_profile = 'best-epoch/val_loss' 
nexus_metric_counts = 'valid-peaks/avg/counts/spearmanr'
nexus_metric = 'best-epoch/val_loss'
seq_metric = 'best-epoch/val_loss'
seq_metric_profile = 'best-epoch/val_profile_loss'
seq_metric_profile2 = 'valid-peaks/avg/profile/binsize=1/auprc' 
seq_metric_counts = 'valid-peaks/avg/counts/spearmanr'
In [34]:
# Plot params
profile_auprc_name = 'Profile auPRC'
counts_spearman_name = r"Total counts $R_{s}$"

s_default = 20
In [35]:
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'nexus-single-task')])
In [36]:
exps
Out[36]:
['nexus,peaks,O,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE',
 'nexus,peaks,S,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE',
 'nexus,peaks,N,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE',
 'nexus,peaks,K,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE']
In [37]:
nexus_metric_profile
Out[37]:
'valid-peaks/avg/profile/binsize=1/auprc'
In [38]:
nexus_default_exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE-2'

Multi-task

In [269]:
# Multi-task
v = {tf: df.loc[nexus_default_exp][f'valid-peaks/{tf}/profile/binsize=1/auprc'] for tf in osnk_tfs}
v
Out[269]:
{'Oct4': 0.1833659351513462,
 'Sox2': 0.3802538585433689,
 'Nanog': 0.4486560937694963,
 'Klf4': 0.15679826648753378}
In [270]:
mean(list(v.values()))
Out[270]:
0.2922685384879363
In [271]:
# Single task
v= {tf: dict(df.loc[exps][['tfs', nexus_metric_profile]].set_index("tfs").iloc[:,0])[tf[0]] for tf in osnk_tfs}
v
Out[271]:
{'Oct4': 0.20845759163615404,
 'Sox2': 0.4250493674411673,
 'Nanog': 0.4710824673004411,
 'Klf4': 0.17793632002159224}
In [272]:
mean(list(v.values()))
Out[272]:
0.32063143659983867

Single-task

In [273]:
# Multi-task
v = {tf: df.loc[nexus_default_exp][f'valid-peaks/{tf}/counts/spearmanr'] for tf in osnk_tfs}
v
Out[273]:
{'Oct4': 0.479076032353049,
 'Sox2': 0.44157320459105576,
 'Nanog': 0.6012117950669067,
 'Klf4': 0.5773766993655419}
In [274]:
mean(list(v.values()))
Out[274]:
0.5248094328441384
In [275]:
# Single task
v = {tf: dict(df.loc[exps][['tfs', f'valid-peaks/{tf}/counts/spearmanr']].set_index("tfs").iloc[:,0])[tf[0]] for tf in osnk_tfs}
v
Out[275]:
{'Oct4': 0.4984590360702481,
 'Sox2': 0.4642387531706982,
 'Nanog': 0.585168601772147,
 'Klf4': 0.5841399987556414}
In [276]:
mean(list(v.values()))
Out[276]:
0.5330015974421837

ChIP-nexus hyper-parameters

In [277]:
nexus_default_exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE-2'

Learning rate

In [278]:
df.sort_values(nexus_metric_profile, ascending=False)
Out[278]:
assay augment_interval batch_size best-epoch/Klf4/class_loss best-epoch/Klf4/counts_loss best-epoch/Klf4/profile_loss best-epoch/Nanog/class_loss best-epoch/Nanog/counts_loss best-epoch/Nanog/profile_loss best-epoch/Oct4/class_loss best-epoch/Oct4/counts_loss best-epoch/Oct4/profile_loss best-epoch/Sox2/class_loss best-epoch/Sox2/counts_loss best-epoch/Sox2/profile_loss best-epoch/epoch best-epoch/loss best-epoch/val_Klf4/class_loss best-epoch/val_Klf4/counts_loss best-epoch/val_Klf4/profile_loss best-epoch/val_Nanog/class_loss best-epoch/val_Nanog/counts_loss best-epoch/val_Nanog/profile_loss best-epoch/val_Oct4/class_loss best-epoch/val_Oct4/counts_loss best-epoch/val_Oct4/profile_loss best-epoch/val_Sox2/class_loss best-epoch/val_Sox2/counts_loss best-epoch/val_Sox2/profile_loss best-epoch/val_loss bias_pool binary_weight bn dataspec filters gin_bindings gin_files imp_score lr merge_profile_reg modisco_tasks n_dil_layers note p_peak padding peak_width profile_weight r_modisco r_wandb region regression_weight run_modisco seed seq_width stats/train/h:m:s stats/train/io_in stats/train/io_out stats/train/max_pss stats/train/max_rss stats/train/max_uss stats/train/max_vms stats/train/mean_load stats/train/s tconv_kernel_size tfs train train-peaks/Klf4/counts/mad train-peaks/Klf4/counts/mse train-peaks/Klf4/counts/pearsonr train-peaks/Klf4/counts/spearmanr train-peaks/Klf4/counts/var_explained train-peaks/Klf4/profile/binsize=1/auprc train-peaks/Klf4/profile/binsize=1/frac_ambigous train-peaks/Klf4/profile/binsize=1/imbalance train-peaks/Klf4/profile/binsize=1/n_positives train-peaks/Klf4/profile/binsize=1/random_auprc train-peaks/Klf4/profile/binsize=10/auprc train-peaks/Klf4/profile/binsize=10/frac_ambigous train-peaks/Klf4/profile/binsize=10/imbalance train-peaks/Klf4/profile/binsize=10/n_positives train-peaks/Klf4/profile/binsize=10/random_auprc train-peaks/Nanog/counts/mad train-peaks/Nanog/counts/mse train-peaks/Nanog/counts/pearsonr train-peaks/Nanog/counts/spearmanr train-peaks/Nanog/counts/var_explained train-peaks/Nanog/profile/binsize=1/auprc train-peaks/Nanog/profile/binsize=1/frac_ambigous train-peaks/Nanog/profile/binsize=1/imbalance train-peaks/Nanog/profile/binsize=1/n_positives train-peaks/Nanog/profile/binsize=1/random_auprc train-peaks/Nanog/profile/binsize=10/auprc train-peaks/Nanog/profile/binsize=10/frac_ambigous train-peaks/Nanog/profile/binsize=10/imbalance train-peaks/Nanog/profile/binsize=10/n_positives train-peaks/Nanog/profile/binsize=10/random_auprc train-peaks/Oct4/counts/mad train-peaks/Oct4/counts/mse train-peaks/Oct4/counts/pearsonr train-peaks/Oct4/counts/spearmanr train-peaks/Oct4/counts/var_explained train-peaks/Oct4/profile/binsize=1/auprc train-peaks/Oct4/profile/binsize=1/frac_ambigous train-peaks/Oct4/profile/binsize=1/imbalance train-peaks/Oct4/profile/binsize=1/n_positives train-peaks/Oct4/profile/binsize=1/random_auprc train-peaks/Oct4/profile/binsize=10/auprc train-peaks/Oct4/profile/binsize=10/frac_ambigous train-peaks/Oct4/profile/binsize=10/imbalance train-peaks/Oct4/profile/binsize=10/n_positives train-peaks/Oct4/profile/binsize=10/random_auprc train-peaks/Sox2/counts/mad train-peaks/Sox2/counts/mse train-peaks/Sox2/counts/pearsonr train-peaks/Sox2/counts/spearmanr train-peaks/Sox2/counts/var_explained train-peaks/Sox2/profile/binsize=1/auprc train-peaks/Sox2/profile/binsize=1/frac_ambigous train-peaks/Sox2/profile/binsize=1/imbalance train-peaks/Sox2/profile/binsize=1/n_positives train-peaks/Sox2/profile/binsize=1/random_auprc train-peaks/Sox2/profile/binsize=10/auprc train-peaks/Sox2/profile/binsize=10/frac_ambigous train-peaks/Sox2/profile/binsize=10/imbalance train-peaks/Sox2/profile/binsize=10/n_positives train-peaks/Sox2/profile/binsize=10/random_auprc train-peaks/avg/counts/mad train-peaks/avg/counts/mse train-peaks/avg/counts/pearsonr train-peaks/avg/counts/spearmanr train-peaks/avg/counts/var_explained train-peaks/avg/profile/binsize=1/auprc train-peaks/avg/profile/binsize=1/frac_ambigous train-peaks/avg/profile/binsize=1/imbalance train-peaks/avg/profile/binsize=1/n_positives train-peaks/avg/profile/binsize=1/random_auprc train-peaks/avg/profile/binsize=10/auprc train-peaks/avg/profile/binsize=10/frac_ambigous train-peaks/avg/profile/binsize=10/imbalance train-peaks/avg/profile/binsize=10/n_positives train-peaks/avg/profile/binsize=10/random_auprc use_bias valid-genome-wide/Klf4/class/accuracy valid-genome-wide/Klf4/class/auPR valid-genome-wide/Klf4/class/auROC valid-genome-wide/Klf4/class/frac_positive valid-genome-wide/Klf4/class/n_negative valid-genome-wide/Klf4/class/n_positive valid-genome-wide/Nanog/class/accuracy valid-genome-wide/Nanog/class/auPR valid-genome-wide/Nanog/class/auROC valid-genome-wide/Nanog/class/frac_positive valid-genome-wide/Nanog/class/n_negative valid-genome-wide/Nanog/class/n_positive valid-genome-wide/Oct4/class/accuracy valid-genome-wide/Oct4/class/auPR valid-genome-wide/Oct4/class/auROC valid-genome-wide/Oct4/class/frac_positive valid-genome-wide/Oct4/class/n_negative valid-genome-wide/Oct4/class/n_positive valid-genome-wide/Sox2/class/accuracy valid-genome-wide/Sox2/class/auPR valid-genome-wide/Sox2/class/auROC valid-genome-wide/Sox2/class/frac_positive valid-genome-wide/Sox2/class/n_negative valid-genome-wide/Sox2/class/n_positive valid-genome-wide/avg/class/accuracy valid-genome-wide/avg/class/auPR valid-genome-wide/avg/class/auROC valid-genome-wide/avg/class/frac_positive valid-genome-wide/avg/class/n_negative valid-genome-wide/avg/class/n_positive valid-peaks/Klf4/counts/mad valid-peaks/Klf4/counts/mse valid-peaks/Klf4/counts/pearsonr valid-peaks/Klf4/counts/spearmanr valid-peaks/Klf4/counts/var_explained valid-peaks/Klf4/profile/binsize=1/auprc valid-peaks/Klf4/profile/binsize=1/frac_ambigous valid-peaks/Klf4/profile/binsize=1/imbalance valid-peaks/Klf4/profile/binsize=1/n_positives valid-peaks/Klf4/profile/binsize=1/random_auprc valid-peaks/Klf4/profile/binsize=10/auprc valid-peaks/Klf4/profile/binsize=10/frac_ambigous valid-peaks/Klf4/profile/binsize=10/imbalance valid-peaks/Klf4/profile/binsize=10/n_positives valid-peaks/Klf4/profile/binsize=10/random_auprc valid-peaks/Nanog/counts/mad valid-peaks/Nanog/counts/mse valid-peaks/Nanog/counts/pearsonr valid-peaks/Nanog/counts/spearmanr valid-peaks/Nanog/counts/var_explained valid-peaks/Nanog/profile/binsize=1/auprc valid-peaks/Nanog/profile/binsize=1/frac_ambigous valid-peaks/Nanog/profile/binsize=1/imbalance valid-peaks/Nanog/profile/binsize=1/n_positives valid-peaks/Nanog/profile/binsize=1/random_auprc valid-peaks/Nanog/profile/binsize=10/auprc valid-peaks/Nanog/profile/binsize=10/frac_ambigous valid-peaks/Nanog/profile/binsize=10/imbalance valid-peaks/Nanog/profile/binsize=10/n_positives valid-peaks/Nanog/profile/binsize=10/random_auprc valid-peaks/Oct4/counts/mad valid-peaks/Oct4/counts/mse valid-peaks/Oct4/counts/pearsonr valid-peaks/Oct4/counts/spearmanr valid-peaks/Oct4/counts/var_explained valid-peaks/Oct4/profile/binsize=1/auprc valid-peaks/Oct4/profile/binsize=1/frac_ambigous valid-peaks/Oct4/profile/binsize=1/imbalance valid-peaks/Oct4/profile/binsize=1/n_positives valid-peaks/Oct4/profile/binsize=1/random_auprc valid-peaks/Oct4/profile/binsize=10/auprc valid-peaks/Oct4/profile/binsize=10/frac_ambigous valid-peaks/Oct4/profile/binsize=10/imbalance valid-peaks/Oct4/profile/binsize=10/n_positives valid-peaks/Oct4/profile/binsize=10/random_auprc valid-peaks/Sox2/counts/mad valid-peaks/Sox2/counts/mse valid-peaks/Sox2/counts/pearsonr valid-peaks/Sox2/counts/spearmanr valid-peaks/Sox2/counts/var_explained valid-peaks/Sox2/profile/binsize=1/auprc valid-peaks/Sox2/profile/binsize=1/frac_ambigous valid-peaks/Sox2/profile/binsize=1/imbalance valid-peaks/Sox2/profile/binsize=1/n_positives valid-peaks/Sox2/profile/binsize=1/random_auprc valid-peaks/Sox2/profile/binsize=10/auprc valid-peaks/Sox2/profile/binsize=10/frac_ambigous valid-peaks/Sox2/profile/binsize=10/imbalance valid-peaks/Sox2/profile/binsize=10/n_positives valid-peaks/Sox2/profile/binsize=10/random_auprc valid-peaks/avg/counts/mad valid-peaks/avg/counts/mse valid-peaks/avg/counts/pearsonr valid-peaks/avg/counts/spearmanr valid-peaks/avg/counts/var_explained valid-peaks/avg/profile/binsize=1/auprc valid-peaks/avg/profile/binsize=1/frac_ambigous valid-peaks/avg/profile/binsize=1/imbalance valid-peaks/avg/profile/binsize=1/n_positives valid-peaks/avg/profile/binsize=1/random_auprc valid-peaks/avg/profile/binsize=10/auprc valid-peaks/avg/profile/binsize=10/frac_ambigous valid-peaks/avg/profile/binsize=10/imbalance valid-peaks/avg/profile/binsize=10/n_positives valid-peaks/avg/profile/binsize=10/random_auprc best-epoch/val_profile_loss best-epoch/val_counts_loss
exp
nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.05,9,FALSE nexus False 128.0 NaN 0.6387 1038.3725 NaN 0.8809 1451.3745 NaN 0.4275 935.5277 NaN 0.3053 497.8172 7.0 3945.6162 NaN 0.6381 1031.9275 NaN 0.8843 1475.1878 NaN 0.4258 939.3719 NaN 0.3119 503.9134 3973.0010 NaN 0.0 False ChIP-nexus.dataspec.yml 64.0 b_loss_weight=0;c_los... problem-peaks.gin,joi... profile/wn 0.050 False NaN 9.0 lr-nexus 0.5 same 1000 1.00 NaN wandb peaks 10.0 False None 1000 1:12:15 39.55 18.18 10132.39 43432.02 7718.66 659943.66 0.0 4335.1623 25.0 OSNK True 0.6379 0.6389 -0.0006 -0.0004 -5.9605e-07 0.5013 0.064 0.0026 189381.0 0.5013 0.5136 0.3183 0.0273 147401.0 0.5136 0.7443 0.8799 0.0010 0.0011 -1.0729e-06 0.5027 0.058 5.3544e-03 409298.0 5.0268e-01 0.5199 0.2409 0.0398 245161.0 0.5199 0.4991 0.4275 0.0005 0.0009 0.0000 0.5015 0.0725 2.9488e-03 154097.0 5.0147e-01 0.5168 0.3639 0.0336 120478.0 0.5168 0.4103 0.3059 -0.0001 0.0005 -0.0013 0.5032 0.0703 0.0063 55119.0 0.5032 0.5275 0.3199 0.0551 35175.0 0.5275 0.5729 0.5631 0.0002 0.0005 -0.0003 0.5021 0.0662 0.0043 201973.7500 0.5021 0.5195 0.3108 0.0389 137053.7500 0.5195 False NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.6396 0.639 -0.0004 -0.0002 -4.7684e-07 0.5013 0.0636 0.0025 59593.0 0.5013 0.5135 0.3164 0.027 46678.0 0.5135 0.7425 0.8857 0.0003 0.0006 -4.5300e-06 5.0262e-01 0.0581 5.2392e-03 130301.0 5.0262e-01 0.5195 0.2430 0.0391 78059.0 0.5195 0.4984 0.4266 0.0013 0.0005 1.6093e-06 0.5014 0.0719 0.0029 48721.0 0.5014 0.5166 0.3615 0.0331 38842.0 0.5166 0.4120 0.3116 -0.0016 -0.0032 -0.0014 0.5029 0.0719 0.0058 17464.0 0.5029 0.5266 0.3297 0.0533 11634.0 0.5266 0.5731 0.5657 -8.6901e-05 -0.0006 -0.0003 0.5020 0.0664 0.0041 64019.7500 0.5020 0.5191 0.3126 0.0381 43803.2500 0.5191 3950.4006 2.2600
nexus,peaks,N,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE nexus False 128.0 NaN NaN NaN NaN 0.5926 975.9495 NaN NaN NaN NaN NaN NaN 7.0 981.8758 NaN NaN NaN NaN 0.6156 1017.6103 NaN NaN NaN NaN NaN NaN 1023.7664 NaN 0.0 False ChIP-nexus.dataspec.yml 64.0 b_loss_weight=0;c_los... problem-peaks.gin,joi... profile/wn 0.004 False NaN 9.0 nexus-single-task 0.5 same 1000 1.00 NaN wandb peaks 10.0 False None 1000 0:30:40 2320.00 10.36 3650.31 28179.99 1667.71 483509.83 0.0 1840.9186 25.0 N True NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.5735 0.5428 0.6249 0.6208 3.8509e-01 0.5286 0.058 5.3544e-03 409298.0 5.2839e-03 0.7919 0.2409 0.0398 245161.0 0.0392 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.5735 0.5428 0.6249 0.6208 0.3851 0.5286 0.0580 0.0054 409298.0000 0.0053 0.7919 0.2409 0.0398 245161.0000 0.0392 False NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.5944 0.6126 0.5599 0.5852 3.1338e-01 4.7108e-01 0.0581 5.2392e-03 130301.0 5.3311e-03 0.7491 0.2430 0.0391 78059.0 0.0402 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.5944 0.6126 5.5993e-01 0.5852 0.3134 0.4711 0.0581 0.0052 130301.0000 0.0053 0.7491 0.2430 0.0391 78059.0000 0.0402 1017.6103 0.6156
nexus,peaks,S,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE nexus False 128.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.2213 447.0558 7.0 449.2691 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.2335 455.5385 457.8733 NaN 0.0 False ChIP-nexus.dataspec.yml 64.0 b_loss_weight=0;c_los... problem-peaks.gin,joi... profile/wn 0.004 False NaN 9.0 nexus-single-task 0.5 same 1000 1.00 NaN wandb peaks 10.0 False None 1000 0:26:24 1851.25 11.13 4681.15 32680.51 2337.08 518329.80 0.0 1584.6983 25.0 S True NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.3430 0.2074 0.5770 0.5170 0.3280 0.5446 0.0703 0.0063 55119.0 0.0064 0.8461 0.3199 0.0551 35175.0 0.0552 0.3430 0.2074 0.5770 0.5170 0.3280 0.5446 0.0703 0.0063 55119.0000 0.0064 0.8461 0.3199 0.0551 35175.0000 0.0552 False NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.3566 0.2329 0.4995 0.4642 0.2495 0.4250 0.0719 0.0058 17464.0 0.0057 0.7744 0.3297 0.0533 11634.0 0.0570 0.3566 0.2329 4.9954e-01 0.4642 0.2495 0.4250 0.0719 0.0058 17464.0000 0.0057 0.7744 0.3297 0.0533 11634.0000 0.0570 455.5385 0.2335
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
seq,gw,OSN,1,0.5,0.05,FALSE,valid,0.5,64,50,0.001,9,FALSE seq False 128.0 NaN NaN NaN 0.2588 0.3496 289.9842 0.1928 0.2684 180.7970 0.0605 0.2151 248.4165 4.0 36.8885 NaN NaN NaN 0.0847 0.3432 129.4235 0.0558 0.2953 90.8904 0.0064 0.3036 181.7915 20.7233 NaN 1.0 False ChIP-seq.dataspec.yml 64.0 b_loss_weight=1;c_los... problem-gw.gin,joint-... profile/wn 0.001 False NaN 9.0 binary + profile 0.5 valid 1000 0.05 NaN wandb gw 0.5 False None 3118 15:11:33 3073.88 174.81 8288.19 88768.73 7026.00 662280.29 0.0 54693.5833 50.0 OSN True NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.5557 0.4482 0.8118 0.7782 6.5890e-01 0.0016 0.068 3.0869e-05 453.0 3.3133e-05 0.0242 0.2550 0.0003 325.0 0.0003 0.4148 0.2654 0.7908 0.7830 0.6250 0.0001 0.1049 2.5822e-05 30.0 1.7247e-05 0.0085 0.3771 0.0003 28.0 0.0003 0.3117 0.1541 0.7803 0.7462 0.5960 0.0176 0.1006 0.0006 490.0 0.0005 0.5364 0.3700 0.0074 453.0 0.0064 0.4274 0.2892 0.7943 0.7691 0.6266 0.0064 0.0912 0.0002 324.3333 0.0002 0.1897 0.3340 0.0027 268.6667 0.0023 False NaN NaN NaN NaN NaN NaN 0.9698 0.1337 0.9469 0.0033 9939787.0 32970.0 0.9848 0.0948 0.9298 0.0014 9958831.0 13926.0 0.9987 0.1151 0.8984 0.0006 9966833.0 5924.0 0.9844 0.1146 0.9250 0.0018 9.9552e+06 17606.6667 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.8901 1.1929 0.4159 0.3963 3.6664e-02 7.9673e-05 0.0666 2.2606e-05 105.0 2.4311e-05 0.0006 0.2487 0.0002 57.0 0.0001 0.6295 0.6252 0.5156 0.5056 1.6718e-01 NaN 0.1071 0.0000 0.0 NaN NaN 0.3729 0.0000 0.0 NaN 0.4408 0.3262 0.4484 0.4377 0.1577 0.0027 0.1119 0.0004 97.0 0.0003 0.1142 0.4215 0.0057 97.0 0.0052 0.6535 0.7147 4.5993e-01 0.4465 0.1205 NaN 0.0952 0.0001 67.3333 NaN NaN 0.3477 0.0020 51.3333 NaN 402.1055 0.9421
seq,gw,OSN,1,0.1,0.01,FALSE,valid,0.5,64,50,0.001,9,FALSE seq False 128.0 NaN NaN NaN 0.2478 0.4664 296.5045 0.1118 0.3244 182.5709 0.0265 0.2495 249.9536 5.0 7.7805 NaN NaN NaN 0.1169 0.4074 130.2945 0.0227 0.3280 91.0853 0.0056 0.3370 182.2588 4.2889 NaN 1.0 False ChIP-seq.dataspec.yml 64.0 b_loss_weight=1;c_los... problem-gw.gin,joint-... profile/wn 0.001 False NaN 9.0 binary + profile 0.5 valid 1000 0.01 NaN wandb gw 0.1 False None 3118 17:11:44 5565.51 219.97 9027.00 90540.82 5099.82 663886.37 0.0 61904.7757 50.0 OSN True NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.6001 0.5549 0.6687 0.6395 3.8208e-01 0.0005 0.068 3.0869e-05 453.0 2.8794e-05 0.0025 0.2550 0.0003 325.0 0.0003 0.4787 0.3558 0.7103 0.7127 0.4696 0.0001 0.1049 2.5822e-05 30.0 2.0761e-05 0.0066 0.3771 0.0003 28.0 0.0006 0.3024 0.1502 0.7011 0.6893 0.4706 0.0071 0.1006 0.0006 490.0 0.0006 0.2976 0.3700 0.0074 453.0 0.0079 0.4604 0.3537 0.6934 0.6805 0.4407 0.0026 0.0912 0.0002 324.3333 0.0002 0.1022 0.3340 0.0027 268.6667 0.0029 False NaN NaN NaN NaN NaN NaN 0.9520 0.0818 0.9189 0.0033 9939787.0 32970.0 0.9926 0.0876 0.9068 0.0014 9958831.0 13926.0 0.9984 0.0692 0.7974 0.0006 9966833.0 5924.0 0.9810 0.0795 0.8744 0.0018 9.9552e+06 17606.6667 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.8811 1.1945 0.3661 0.3486 -1.7562e-01 6.7402e-05 0.0666 2.2606e-05 105.0 2.7931e-05 0.0006 0.2487 0.0002 57.0 0.0002 0.6848 0.7186 0.4949 0.4746 6.9421e-02 NaN 0.1071 0.0000 0.0 NaN NaN 0.3729 0.0000 0.0 NaN 0.4325 0.3146 0.4414 0.4244 0.0376 0.0028 0.1119 0.0004 97.0 0.0004 0.1096 0.4215 0.0057 97.0 0.0051 0.6661 0.7425 4.3413e-01 0.4159 -0.0229 NaN 0.0952 0.0001 67.3333 NaN NaN 0.3477 0.0020 51.3333 NaN 403.6386 1.0724
seq,gw,OSN,0,10,1,FALSE,valid,0.5,64,50,0.001,9,FALSE seq False 128.0 NaN NaN NaN NaN 0.3723 290.0194 NaN 0.2861 180.8420 NaN 0.2208 248.4145 2.0 728.0673 NaN NaN NaN NaN 0.3705 129.5535 NaN 0.3080 90.7777 NaN 0.3030 181.7373 411.8831 NaN 0.0 False ChIP-seq.dataspec.yml 64.0 b_loss_weight=0;c_los... problem-gw.gin,joint-... profile/wn 0.001 False NaN 9.0 gw + profile 0.5 valid 1000 1.00 NaN wandb gw 10.0 False None 3118 12:34:22 6119.18 290.29 8501.05 82482.75 5160.75 650402.36 0.0 45262.0300 50.0 OSN True NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.4549 0.3144 0.8268 0.7980 6.7637e-01 0.0045 0.068 3.0869e-05 453.0 2.8209e-05 0.0300 0.2550 0.0003 325.0 0.0003 0.3805 0.2253 0.7914 0.7741 0.6260 0.0001 0.1049 2.5822e-05 30.0 2.5833e-05 0.0130 0.3771 0.0003 28.0 0.0004 0.2603 0.1117 0.7754 0.7318 0.5923 0.0139 0.1006 0.0006 490.0 0.0006 0.5905 0.3700 0.0074 453.0 0.0072 0.3653 0.2171 0.7979 0.7680 0.6316 0.0062 0.0912 0.0002 324.3333 0.0002 0.2112 0.3340 0.0027 268.6667 0.0026 False NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.8126 1.0394 0.4222 0.4001 -4.2650e-02 7.2238e-05 0.0666 2.2606e-05 105.0 2.8637e-05 0.0007 0.2487 0.0002 57.0 0.0001 0.5941 0.5724 0.5024 0.4889 1.2983e-01 NaN 0.1071 0.0000 0.0 NaN NaN 0.3729 0.0000 0.0 NaN 0.3823 0.2609 0.4511 0.4366 0.1359 0.0025 0.1119 0.0004 97.0 0.0004 0.1531 0.4215 0.0057 97.0 0.0048 0.5963 0.6242 4.5856e-01 0.4419 0.0744 NaN 0.0952 0.0001 67.3333 NaN NaN 0.3477 0.0020 51.3333 NaN 402.0685 0.9815

211 rows × 249 columns

In [150]:
exps = list(df.index[(df.lr < 0.05) & (df.assay == 'nexus') & (df.note == 'lr-nexus')& (df.augment_interval == False)]) + [nexus_default_exp]
In [151]:
list(df.loc[exps].sort_values('lr')['lr'])
Out[151]:
[0.0005, 0.001, 0.002, 0.004, 0.005, 0.01, 0.02, 0.04]
In [152]:
x_var = 'lr'
In [153]:
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[nexus_metric_profile], s=s_default)
ax.set_ylabel(profile_auprc_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')
In [100]:
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], -dfs[nexus_metric_profile2], s=s_default)
ax.set_ylabel(profile_auprc_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
# fig.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')
In [154]:
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_ylabel(counts_spearman_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

De-conv size

In [431]:
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'deconv4')])
In [432]:
x_var = 'tconv_kernel_size'
In [433]:
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)

ax.scatter(dfs[x_var], dfs[nexus_metric_profile], s=s_default)
ax.set_ylabel(profile_auprc_name);
plt.xticks([1, 10, 25, 35])

ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')
In [434]:
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
ax.axvline(25, color='grey', alpha=0.2)
plt.xticks([1, 10, 25, 35])
ax.scatter(dfs[x_var], dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_ylabel(counts_spearman_name);

ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

Number of layers (same padding)

Compute the receptive field:

In [160]:
from basepair.models import seq_bpnet_cropped_extra_seqlen
In [161]:
n_layers = np.arange(1, 14)
receptive_field = [seq_bpnet_cropped_extra_seqlen(conv1_kernel_size=25,
                               n_dil_layers=nl-1,
                               tconv_kernel_size=1,
                               target_seqlen=0) + 1
                   for nl in n_layers]
print(pd.DataFrame({"receptive_field": receptive_field, "n_layers": n_layers}).to_string())
    receptive_field  n_layers
0                25         1
1                29         2
2                37         3
3                53         4
4                85         5
5               149         6
6               277         7
7               533         8
8              1045         9
9              2069        10
10             4117        11
11             8213        12
12            16405        13
In [162]:
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'n layers')& (df.padding == 'same')]) + [nexus_default_exp]
In [163]:
x_var = 'n_dil_layers'
In [165]:
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/profile/binsize=1/auprc'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(profile_auprc_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')
In [166]:
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.xticks([1, 5, 10])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xlabel("Number of Layers");
plt.ylabel(counts_spearman_name);
ax.set_xlabel("Number of layers");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

Profile vs regression weight

In [437]:
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'regression_weight') & (df.padding == 'same')]) + [nexus_default_exp]
In [438]:
x_var = 'regression_weight'
In [176]:
## Compute the median value of the total counts for each task
profiles = ImpScoreFile(f"output/{nexus_default_exp}/deeplift.imp_score.h5").get_profiles()

nexus_median_N = np.median(mean([p.sum(axis=-2).mean(axis=-1) for t,p in profiles.items()]))
print(nexus_median_N)
130.5
In [439]:
nexus_natural_weight = nexus_median_N // 2
In [441]:
midpoint = 1
dfs = df.loc[exps].sort_values(x_var)
fig, axes = plt.subplots(2, 1, figsize=get_figsize(.35, 1), sharex=True, gridspec_kw=dict(hspace=0))
ax = axes[0]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / nexus_natural_weight, dfs[nexus_metric_profile], s=s_default)
ax.axvline(x=10/nexus_natural_weight, color='grey', linestyle='--', alpha=0.1)
ax.set_xscale('log')
ax.set_ylabel('Profile\nauPRC');
ax.set_xlim([5e-3, 5e2])
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);

ax = axes[1]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var]  / nexus_natural_weight, dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Tot. counts $R_{s}$');
ax.set_xlabel("Relative total count weight");
ax.set_xlim([5e-3, 5e2]);
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
fig.savefig(fig_nexus_hp / f'{x_var}.both-auprc-spearman.pdf', bbox_inches='tight')

ChIP-seq hyper-parameters

In [179]:
seq_default_exp = 'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE'

Learning rate

In [180]:
exps = list(df.index[(df.assay == 'seq') & (df.note == 'lr-nexus')]) + [seq_default_exp]
In [181]:
list(df.loc[exps].sort_values('lr')['lr'])
Out[181]:
[0.0005, 0.001, 0.002, 0.004, 0.005, 0.01, 0.02, 0.04, 0.05]
In [182]:
seq_metric_name = 'Profile LL'
In [183]:
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], -dfs[seq_metric_profile], s=s_default)
ax.set_ylabel(seq_metric_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')
In [184]:
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[seq_metric_counts], s=s_default)
ax.set_ylabel(counts_spearman_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

De-conv size

In [347]:
exps = list(df.index[(df.assay == 'seq') & (df.note == 'deconv')]) + [seq_default_exp]
In [348]:
x_var = 'tconv_kernel_size'
In [349]:
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)

ax.scatter(dfs[x_var], -dfs[seq_metric_profile], s=s_default)
ax.set_ylabel(seq_metric_name);
plt.xticks([1, 20, 50, 100])

ax.set_xlabel("De-convolution size");
fig.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')
In [350]:
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(50, color='grey', alpha=0.2)
plt.xticks([1, 20, 50, 100])
ax.scatter(dfs[x_var], dfs[seq_metric_counts], s=s_default)
ax.set_ylabel(counts_spearman_name);

ax.set_xlabel("De-convolution size");
fig.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

De-conv size 2

In [351]:
exps = list(df.index[(df.assay == 'seq') & (df.note == 'deconv2')])
In [352]:
x_var = 'tconv_kernel_size'
In [353]:
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)

ax.scatter(dfs[x_var], -dfs[seq_metric_profile], s=s_default)
ax.set_ylabel(seq_metric_name);
plt.xticks([1, 20, 50, 100])

ax.set_xlabel("De-convolution size");
# fig.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')
In [354]:
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(50, color='grey', alpha=0.2)
plt.xticks([1, 20, 50, 100])
ax.scatter(dfs[x_var], dfs[seq_metric_counts], s=s_default)
ax.set_ylabel(counts_spearman_name);

ax.set_xlabel("De-convolution size");
# fig.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

Number of layers (same padding)

In [189]:
exps = list(df.index[(df.assay == 'seq') & (df.note == 'n layers')& (df.padding == 'same')]) + [seq_default_exp]
In [190]:
x_var = 'n_dil_layers'
In [191]:
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, -dfs[f'best-epoch/val_{tf}/profile_loss'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(seq_metric_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')
In [192]:
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(counts_spearman_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

Profile vs regression weight

In [410]:
exps = list(df.index[(df.assay == 'seq') & (df.note == 'regression_weight') & (df.seed == 'None')]) + [seq_default_exp]
In [411]:
x_var = 'regression_weight'
In [390]:
## Compute the median value of the total counts for each task
profiles = ImpScoreFile(f"output/{seq_default_exp}/deeplift.imp_score.h5").get_profiles()

seq_median_N = np.median(mean([p.sum(axis=-2).mean(axis=-1) for t,p in profiles.items()]))
print(seq_median_N)
49.0
In [412]:
seq_natural_weight = seq_median_N // 2
In [417]:
midpoint = 1
dfs = df.loc[exps].sort_values(x_var)
fig, axes = plt.subplots(2, 1, figsize=get_figsize(.35, 1), sharex=True, gridspec_kw=dict(hspace=0))
ax = axes[0]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / seq_natural_weight, -dfs[f'best-epoch/val_profile_loss'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Profile LL');
ax.set_xlim([5e-3, 5e2])
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);

ax = axes[1]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var]  / seq_natural_weight, dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Tot. counts $R_{s}$');
ax.set_xlabel("Relative total count weight");
ax.set_xlim([5e-3, 5e2]);
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
fig.savefig(fig_seq_hp / f'{x_var}.both-profileLL-spearman.pdf', bbox_inches='tight')

Profile vs regression weight 2

In [418]:
list(df.index[(df.assay == 'seq') & (df.note == 'deconv2') & (df.tconv_kernel_size == 50)])
Out[418]:
['seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42']
In [425]:
df.index[(df.assay == 'seq') & (df.seed == "42")]
Out[425]:
Index(['seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,1,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,10,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,20,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,30,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,40,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,60,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,70,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,80,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,100,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,1,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,2,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,5,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,20,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,50,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,100,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,200,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,500,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,1000,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,2000,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,5000,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42',
       'seq,peaks,OSN,0,10000,1,FALSE,same,0.5,64,50,0.004,9,FALSE,,FALSE,TRUE,42'],
      dtype='object', name='exp')
In [426]:
exps = (list(df.index[(df.assay == 'seq') & (df.seed == "42") & (df.note == 'regression_weight')]) + list(df.index[(df.assay == 'seq') & (df.note == 'deconv2') & (df.tconv_kernel_size == 50)]))
In [427]:
x_var = 'regression_weight'
In [428]:
# ## Compute the median value of the total counts for each task
# profiles = ImpScoreFile(f"output/{seq_default_exp}/deeplift.imp_score.h5").get_profiles()

# seq_median_N = np.median(mean([p.sum(axis=-2).mean(axis=-1) for t,p in profiles.items()]))
# print(seq_median_N)
In [429]:
seq_natural_weight = seq_median_N // 2
In [430]:
midpoint = 1
dfs = df.loc[exps].sort_values(x_var)
fig, axes = plt.subplots(2, 1, figsize=get_figsize(.35, 1), sharex=True, gridspec_kw=dict(hspace=0))
ax = axes[0]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / seq_natural_weight, -dfs[f'best-epoch/val_profile_loss'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Profile LL');
ax.set_xlim([5e-3, 5e2])
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);

ax = axes[1]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var]  / seq_natural_weight, dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Tot. counts $R_{s}$');
ax.set_xlabel("Relative total count weight");
ax.set_xlim([5e-3, 5e2]);
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
# fig.savefig(fig_seq_hp / f'{x_var}.both-profileLL-spearman.pdf', bbox_inches='tight')

Genome-wide models

Chip-nexus

Learning rate

In [198]:
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'binary - lr')])
In [199]:
list(df.loc[exps].sort_values('lr')['lr'])
Out[199]:
[0.0005,
 0.0005,
 0.001,
 0.001,
 0.002,
 0.002,
 0.004,
 0.004,
 0.005,
 0.005,
 0.01,
 0.01,
 0.02,
 0.02,
 0.04,
 0.04,
 0.05,
 0.05]
In [200]:
gw_binary_metric = 'valid-genome-wide/avg/class/auPR'
In [201]:
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_nexus_hp / f'{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')
In [202]:
features = [f'valid-genome-wide/{tf}/class/{feature}'
           for tf in osnk_tfs for feature in ['frac_positive', 'n_negative', 'n_positive']]
dict(dfs[features].drop_duplicates().dropna().iloc[0])
Out[202]:
{'valid-genome-wide/Oct4/class/frac_positive': 0.005917937004352722,
 'valid-genome-wide/Oct4/class/n_negative': 9914050.0,
 'valid-genome-wide/Oct4/class/n_positive': 59020.0,
 'valid-genome-wide/Sox2/class/frac_positive': 0.001896607564170311,
 'valid-genome-wide/Sox2/class/n_negative': 9954155.0,
 'valid-genome-wide/Sox2/class/n_positive': 18915.0,
 'valid-genome-wide/Nanog/class/frac_positive': 0.008883322788268809,
 'valid-genome-wide/Nanog/class/n_negative': 9884476.0,
 'valid-genome-wide/Nanog/class/n_positive': 88594.0,
 'valid-genome-wide/Klf4/class/frac_positive': 0.012622291831903316,
 'valid-genome-wide/Klf4/class/n_negative': 9847187.0,
 'valid-genome-wide/Klf4/class/n_positive': 125883.0}

Profile importance

In [27]:
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'binary + profile')& (df.padding == 'same')])
default = 'nexus,gw,OSNK,1,0,0,FALSE,same,0.5,64,25,0.001,9,FALSE'
In [28]:
x_var = 'profile_weight'
In [29]:
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter((dfs[x_var]), dfs[f'valid-genome-wide/avg/class/auPR'], s=s_default)
ax.axhline(df.loc[default][f'valid-genome-wide/avg/class/auPR'])
# ax.set_ylim([0.07, 0.15])
ax.set_xlim([5e-3, 2])
ax.set_ylabel("auPR");
ax.set_xscale('log')
ax.set_xlabel("Profile importance");
fig.savefig(fig_nexus_hp / f'{x_var}.gw-binary.auprc.pdf', bbox_inches='tight')

Chip-seq

Learning rate

In [206]:
exps = list(df.index[(df.assay == 'seq') & (df.note == 'binary - lr')])
In [207]:
list(df.loc[exps].sort_values('lr')['lr'])
Out[207]:
[0.0005,
 0.0005,
 0.001,
 0.001,
 0.002,
 0.002,
 0.004,
 0.004,
 0.005,
 0.005,
 0.01,
 0.01,
 0.02,
 0.02,
 0.04,
 0.04,
 0.05,
 0.05]
In [208]:
gw_binary_metric = 'valid-genome-wide/avg/class/auPR'
In [209]:
x_var = 'lr'
In [210]:
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_seq_hp / f'{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')
In [211]:
features = [f'valid-genome-wide/{tf}/class/{feature}'
           for tf in osn_tfs for feature in ['frac_positive', 'n_negative', 'n_positive']]
dict(dfs[features].drop_duplicates().dropna().iloc[0])
Out[211]:
{'valid-genome-wide/Oct4/class/frac_positive': 0.0013963865814930346,
 'valid-genome-wide/Oct4/class/n_negative': 9958957.0,
 'valid-genome-wide/Oct4/class/n_positive': 13926.0,
 'valid-genome-wide/Sox2/class/frac_positive': 0.0005940107790294942,
 'valid-genome-wide/Sox2/class/n_negative': 9966959.0,
 'valid-genome-wide/Sox2/class/n_positive': 5924.0,
 'valid-genome-wide/Nanog/class/frac_positive': 0.003305964784706689,
 'valid-genome-wide/Nanog/class/n_negative': 9939913.0,
 'valid-genome-wide/Nanog/class/n_positive': 32970.0}

Note

ChIP-nexus has roughly 3x more peaks hence auPR is higher.

Adding profile weight

In [36]:
default = 'seq,gw,OSN,1,0,0,FALSE,same,0.5,64,50,0.001,9,FALSE'
exps = list(df.index[(df.assay == 'seq') & (df.note == 'binary + profile')& (df.padding == 'same')])
In [37]:
x_var = 'profile_weight'
In [38]:
dfs = df.loc[exps].sort_values(x_var)


# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter((dfs[x_var]), dfs[f'valid-genome-wide/avg/class/auPR'], s=s_default)
ax.axhline(df.loc[default][f'valid-genome-wide/avg/class/auPR'])
ax.set_ylim([0.07, 0.15])
ax.set_xlim([5e-3, 2])
ax.set_ylabel("auPR");
ax.set_xscale('log')
ax.set_xlabel("Profile importance");
fig.savefig(fig_seq_hp / f'{x_var}.same-padding.gw-binary.auprc.pdf', bbox_inches='tight')
In [39]:
default = 'seq,gw,OSN,1,0,0,FALSE,valid,0.5,64,50,0.001,9,FALSE'
exps = list(df.index[(df.assay == 'seq') & (df.note == 'binary + profile')& (df.padding == 'valid')])
In [40]:
x_var = 'profile_weight'
In [41]:
dfs = df.loc[exps].sort_values(x_var)


# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter((dfs[x_var]), dfs[f'valid-genome-wide/avg/class/auPR'], s=s_default)
ax.axhline(df.loc[default][f'valid-genome-wide/avg/class/auPR'])
ax.set_ylim([0.07, 0.15])
ax.set_xlim([5e-3, 2])
ax.set_ylabel("auPR");
ax.set_xscale('log')
ax.set_xlabel("Profile importance");
fig.savefig(fig_seq_hp / f'{x_var}.valid-padding.gw-binary.auprc.pdf', bbox_inches='tight')

Extra

ChIP-nexus

Number of layers (valid padding)

In [215]:
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'n layers')& (df.padding == 'valid')])
In [216]:
x_var = 'n_dil_layers'
In [217]:
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/profile/binsize=1/auprc'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(profile_auprc_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_nexus_hp / f'{x_var}.valid-padding.profile-auprc.pdf', bbox_inches='tight')
In [218]:
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.xticks([1, 5, 10])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xlabel("Number of Layers");
plt.ylabel(counts_spearman_name);
ax.set_xlabel("Number of layers");
fig.savefig(fig_nexus_hp / f'{x_var}.valid-padding.counts-spearman.pdf', bbox_inches='tight')

De-conv size

In [243]:
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'deconv') & (df.padding == 'valid')]) + [nexus_default_exp]
In [244]:
x_var = 'tconv_kernel_size'
In [245]:
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)

ax.scatter(dfs[x_var], dfs[nexus_metric_profile], s=s_default)
ax.set_ylabel(profile_auprc_name);
plt.xticks([1, 10, 25, 35])

ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.valid-padding.profile-auprc.pdf', bbox_inches='tight')
In [159]:
dfs = df.loc[exps].sort_values(x_var)

# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
ax.axvline(25, color='grey', alpha=0.2)
plt.xticks([1, 10, 25, 35])
ax.scatter(dfs[x_var], dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_ylabel(counts_spearman_name);

ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')

ChIP-seq

Number of layers (valid padding)

In [219]:
exps = list(df.index[(df.assay == 'seq') & (df.note == 'n layers')& (df.padding == 'valid')])
In [220]:
x_var = 'n_dil_layers'
In [221]:
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, -dfs[f'best-epoch/val_{tf}/profile_loss'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(seq_metric_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.valid-padding.profile-ll.pdf', bbox_inches='tight')
In [222]:
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
    dfs = df.loc[exps].sort_values(x_var)
    plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'], 
                label=tf,
                color=tf_colors[tf],
                s=15,
               )
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(counts_spearman_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.valid-padding.counts-spearman.pdf', bbox_inches='tight')

Classification models

Basset

In [18]:
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'basset')])
gw_binary_metric = 'valid-genome-wide/avg/class/auPR'
x_var = 'dropout'
In [20]:
dfs = df.loc[exps].sort_values(x_var)
dfs[gw_binary_metric].max()
Out[20]:
0.24305993415698324
In [24]:
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
# ax.set_xscale("log")
# ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Dropout");
fig.savefig(fig_seq_hp / f'basset.{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')

Factorized Basset

In [21]:
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'factorized-basset')])
In [22]:
dfs = df.loc[exps].sort_values(x_var)
dfs[gw_binary_metric].max()
Out[22]:
0.23801075095517246
In [26]:
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
# ax.set_xscale("log")
# ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Dropout");
fig.savefig(fig_seq_hp / f'factorized-basset.{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')