# Imports
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from basepair.imports import *
from basepair.exp.paper.config import tf_colors
from basepair.functions import mean
from basepair.cli.imp_score import ImpScoreFile
paper_config()
fig_nexus_hp = Path(f"{ddir}/figures/model-evaluation/ChIP-nexus/hyper-parameters-v2")
fig_seq_hp = Path(f"{ddir}/figures/model-evaluation/ChIP-seq/hyper-parameters-v2")
osn_tfs = ['Oct4', 'Sox2', 'Nanog']
osnk_tfs = ['Oct4', 'Sox2', 'Nanog', 'Klf4']
!mkdir -p {fig_seq_hp}
!mkdir -p {fig_nexus_hp}
df = pd.read_csv("output/model.results.finished.csv")
df.set_index('exp', inplace=True)
# Setup the profile loss
df['best-epoch/val_profile_loss'] = 0
for tf in osnk_tfs:
x = df[f'best-epoch/val_{tf}/profile_loss']
not_null = ~ x.isnull()
df['best-epoch/val_profile_loss'][not_null] += x[not_null]
# Setup the profile loss
df['best-epoch/val_counts_loss'] = 0
for tf in osnk_tfs:
x = df[f'best-epoch/val_{tf}/counts_loss']
not_null = ~ x.isnull()
df['best-epoch/val_counts_loss'][not_null] += x[not_null]
len(df)
nexus_metric_profile = 'valid-peaks/avg/profile/binsize=1/auprc'
nexus_metric_profile2 = 'best-epoch/val_profile_loss'
# nexus_metric_profile = 'best-epoch/val_loss'
nexus_metric_counts = 'valid-peaks/avg/counts/spearmanr'
nexus_metric = 'best-epoch/val_loss'
seq_metric = 'best-epoch/val_loss'
seq_metric_profile = 'best-epoch/val_profile_loss'
seq_metric_profile2 = 'valid-peaks/avg/profile/binsize=1/auprc'
seq_metric_counts = 'valid-peaks/avg/counts/spearmanr'
# Plot params
profile_auprc_name = 'Profile auPRC'
counts_spearman_name = r"Total counts $R_{s}$"
s_default = 20
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'nexus-single-task')])
exps
nexus_metric_profile
nexus_default_exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE-2'
# Multi-task
v = {tf: df.loc[nexus_default_exp][f'valid-peaks/{tf}/profile/binsize=1/auprc'] for tf in osnk_tfs}
v
mean(list(v.values()))
# Single task
v= {tf: dict(df.loc[exps][['tfs', nexus_metric_profile]].set_index("tfs").iloc[:,0])[tf[0]] for tf in osnk_tfs}
v
mean(list(v.values()))
# Multi-task
v = {tf: df.loc[nexus_default_exp][f'valid-peaks/{tf}/counts/spearmanr'] for tf in osnk_tfs}
v
mean(list(v.values()))
# Single task
v = {tf: dict(df.loc[exps][['tfs', f'valid-peaks/{tf}/counts/spearmanr']].set_index("tfs").iloc[:,0])[tf[0]] for tf in osnk_tfs}
v
mean(list(v.values()))
nexus_default_exp = 'nexus,peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE-2'
df.sort_values(nexus_metric_profile, ascending=False)
exps = list(df.index[(df.lr < 0.05) & (df.assay == 'nexus') & (df.note == 'lr-nexus')& (df.augment_interval == False)]) + [nexus_default_exp]
list(df.loc[exps].sort_values('lr')['lr'])
x_var = 'lr'
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[nexus_metric_profile], s=s_default)
ax.set_ylabel(profile_auprc_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], -dfs[nexus_metric_profile2], s=s_default)
ax.set_ylabel(profile_auprc_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
# fig.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_ylabel(counts_spearman_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'deconv4')])
x_var = 'tconv_kernel_size'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)
ax.scatter(dfs[x_var], dfs[nexus_metric_profile], s=s_default)
ax.set_ylabel(profile_auprc_name);
plt.xticks([1, 10, 25, 35])
ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
ax.axvline(25, color='grey', alpha=0.2)
plt.xticks([1, 10, 25, 35])
ax.scatter(dfs[x_var], dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_ylabel(counts_spearman_name);
ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')
Compute the receptive field:
from basepair.models import seq_bpnet_cropped_extra_seqlen
n_layers = np.arange(1, 14)
receptive_field = [seq_bpnet_cropped_extra_seqlen(conv1_kernel_size=25,
n_dil_layers=nl-1,
tconv_kernel_size=1,
target_seqlen=0) + 1
for nl in n_layers]
print(pd.DataFrame({"receptive_field": receptive_field, "n_layers": n_layers}).to_string())
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'n layers')& (df.padding == 'same')]) + [nexus_default_exp]
x_var = 'n_dil_layers'
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
dfs = df.loc[exps].sort_values(x_var)
plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/profile/binsize=1/auprc'],
label=tf,
color=tf_colors[tf],
s=15,
)
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(profile_auprc_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_nexus_hp / f'{x_var}.profile-auprc.pdf', bbox_inches='tight')
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
dfs = df.loc[exps].sort_values(x_var)
plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'],
label=tf,
color=tf_colors[tf],
s=15,
)
plt.xlim([0, 14])
plt.xticks([1, 5, 10])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xlabel("Number of Layers");
plt.ylabel(counts_spearman_name);
ax.set_xlabel("Number of layers");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'regression_weight') & (df.padding == 'same')]) + [nexus_default_exp]
x_var = 'regression_weight'
## Compute the median value of the total counts for each task
profiles = ImpScoreFile(f"output/{nexus_default_exp}/deeplift.imp_score.h5").get_profiles()
nexus_median_N = np.median(mean([p.sum(axis=-2).mean(axis=-1) for t,p in profiles.items()]))
print(nexus_median_N)
nexus_natural_weight = nexus_median_N // 2
midpoint = 1
dfs = df.loc[exps].sort_values(x_var)
fig, axes = plt.subplots(2, 1, figsize=get_figsize(.35, 1), sharex=True, gridspec_kw=dict(hspace=0))
ax = axes[0]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / nexus_natural_weight, dfs[nexus_metric_profile], s=s_default)
ax.axvline(x=10/nexus_natural_weight, color='grey', linestyle='--', alpha=0.1)
ax.set_xscale('log')
ax.set_ylabel('Profile\nauPRC');
ax.set_xlim([5e-3, 5e2])
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
ax = axes[1]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / nexus_natural_weight, dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Tot. counts $R_{s}$');
ax.set_xlabel("Relative total count weight");
ax.set_xlim([5e-3, 5e2]);
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
fig.savefig(fig_nexus_hp / f'{x_var}.both-auprc-spearman.pdf', bbox_inches='tight')
seq_default_exp = 'seq,peaks,OSN,0,10,1,FALSE,same,0.5,64,50,0.004,9,FALSE'
exps = list(df.index[(df.assay == 'seq') & (df.note == 'lr-nexus')]) + [seq_default_exp]
list(df.loc[exps].sort_values('lr')['lr'])
seq_metric_name = 'Profile LL'
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], -dfs[seq_metric_profile], s=s_default)
ax.set_ylabel(seq_metric_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[seq_metric_counts], s=s_default)
ax.set_ylabel(counts_spearman_name);
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'seq') & (df.note == 'deconv')]) + [seq_default_exp]
x_var = 'tconv_kernel_size'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)
ax.scatter(dfs[x_var], -dfs[seq_metric_profile], s=s_default)
ax.set_ylabel(seq_metric_name);
plt.xticks([1, 20, 50, 100])
ax.set_xlabel("De-convolution size");
fig.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(50, color='grey', alpha=0.2)
plt.xticks([1, 20, 50, 100])
ax.scatter(dfs[x_var], dfs[seq_metric_counts], s=s_default)
ax.set_ylabel(counts_spearman_name);
ax.set_xlabel("De-convolution size");
fig.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'seq') & (df.note == 'deconv2')])
x_var = 'tconv_kernel_size'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)
ax.scatter(dfs[x_var], -dfs[seq_metric_profile], s=s_default)
ax.set_ylabel(seq_metric_name);
plt.xticks([1, 20, 50, 100])
ax.set_xlabel("De-convolution size");
# fig.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(50, color='grey', alpha=0.2)
plt.xticks([1, 20, 50, 100])
ax.scatter(dfs[x_var], dfs[seq_metric_counts], s=s_default)
ax.set_ylabel(counts_spearman_name);
ax.set_xlabel("De-convolution size");
# fig.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'seq') & (df.note == 'n layers')& (df.padding == 'same')]) + [seq_default_exp]
x_var = 'n_dil_layers'
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
dfs = df.loc[exps].sort_values(x_var)
plt.scatter(dfs[x_var]+1, -dfs[f'best-epoch/val_{tf}/profile_loss'],
label=tf,
color=tf_colors[tf],
s=15,
)
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(seq_metric_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.profile-ll.pdf', bbox_inches='tight')
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
dfs = df.loc[exps].sort_values(x_var)
plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'],
label=tf,
color=tf_colors[tf],
s=15,
)
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(counts_spearman_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'seq') & (df.note == 'regression_weight') & (df.seed == 'None')]) + [seq_default_exp]
x_var = 'regression_weight'
## Compute the median value of the total counts for each task
profiles = ImpScoreFile(f"output/{seq_default_exp}/deeplift.imp_score.h5").get_profiles()
seq_median_N = np.median(mean([p.sum(axis=-2).mean(axis=-1) for t,p in profiles.items()]))
print(seq_median_N)
seq_natural_weight = seq_median_N // 2
midpoint = 1
dfs = df.loc[exps].sort_values(x_var)
fig, axes = plt.subplots(2, 1, figsize=get_figsize(.35, 1), sharex=True, gridspec_kw=dict(hspace=0))
ax = axes[0]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / seq_natural_weight, -dfs[f'best-epoch/val_profile_loss'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Profile LL');
ax.set_xlim([5e-3, 5e2])
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
ax = axes[1]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / seq_natural_weight, dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Tot. counts $R_{s}$');
ax.set_xlabel("Relative total count weight");
ax.set_xlim([5e-3, 5e2]);
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
fig.savefig(fig_seq_hp / f'{x_var}.both-profileLL-spearman.pdf', bbox_inches='tight')
list(df.index[(df.assay == 'seq') & (df.note == 'deconv2') & (df.tconv_kernel_size == 50)])
df.index[(df.assay == 'seq') & (df.seed == "42")]
exps = (list(df.index[(df.assay == 'seq') & (df.seed == "42") & (df.note == 'regression_weight')]) + list(df.index[(df.assay == 'seq') & (df.note == 'deconv2') & (df.tconv_kernel_size == 50)]))
x_var = 'regression_weight'
# ## Compute the median value of the total counts for each task
# profiles = ImpScoreFile(f"output/{seq_default_exp}/deeplift.imp_score.h5").get_profiles()
# seq_median_N = np.median(mean([p.sum(axis=-2).mean(axis=-1) for t,p in profiles.items()]))
# print(seq_median_N)
seq_natural_weight = seq_median_N // 2
midpoint = 1
dfs = df.loc[exps].sort_values(x_var)
fig, axes = plt.subplots(2, 1, figsize=get_figsize(.35, 1), sharex=True, gridspec_kw=dict(hspace=0))
ax = axes[0]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / seq_natural_weight, -dfs[f'best-epoch/val_profile_loss'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Profile LL');
ax.set_xlim([5e-3, 5e2])
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
ax = axes[1]
ax.axvline(x=midpoint, color='grey', linestyle='--', alpha=0.5)
ax.scatter(dfs[x_var] / seq_natural_weight, dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_xscale('log')
ax.set_ylabel('Tot. counts $R_{s}$');
ax.set_xlabel("Relative total count weight");
ax.set_xlim([5e-3, 5e2]);
#ax.set_xticks([0.01, 0.1, 1, 10, 100, 1000]);
# fig.savefig(fig_seq_hp / f'{x_var}.both-profileLL-spearman.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'binary - lr')])
list(df.loc[exps].sort_values('lr')['lr'])
gw_binary_metric = 'valid-genome-wide/avg/class/auPR'
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_nexus_hp / f'{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')
features = [f'valid-genome-wide/{tf}/class/{feature}'
for tf in osnk_tfs for feature in ['frac_positive', 'n_negative', 'n_positive']]
dict(dfs[features].drop_duplicates().dropna().iloc[0])
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'binary + profile')& (df.padding == 'same')])
default = 'nexus,gw,OSNK,1,0,0,FALSE,same,0.5,64,25,0.001,9,FALSE'
x_var = 'profile_weight'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter((dfs[x_var]), dfs[f'valid-genome-wide/avg/class/auPR'], s=s_default)
ax.axhline(df.loc[default][f'valid-genome-wide/avg/class/auPR'])
# ax.set_ylim([0.07, 0.15])
ax.set_xlim([5e-3, 2])
ax.set_ylabel("auPR");
ax.set_xscale('log')
ax.set_xlabel("Profile importance");
fig.savefig(fig_nexus_hp / f'{x_var}.gw-binary.auprc.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'seq') & (df.note == 'binary - lr')])
list(df.loc[exps].sort_values('lr')['lr'])
gw_binary_metric = 'valid-genome-wide/avg/class/auPR'
x_var = 'lr'
x_var = 'lr'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
ax.set_xscale("log")
ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Learning rate");
fig.savefig(fig_seq_hp / f'{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')
features = [f'valid-genome-wide/{tf}/class/{feature}'
for tf in osn_tfs for feature in ['frac_positive', 'n_negative', 'n_positive']]
dict(dfs[features].drop_duplicates().dropna().iloc[0])
ChIP-nexus has roughly 3x more peaks hence auPR is higher.
default = 'seq,gw,OSN,1,0,0,FALSE,same,0.5,64,50,0.001,9,FALSE'
exps = list(df.index[(df.assay == 'seq') & (df.note == 'binary + profile')& (df.padding == 'same')])
x_var = 'profile_weight'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter((dfs[x_var]), dfs[f'valid-genome-wide/avg/class/auPR'], s=s_default)
ax.axhline(df.loc[default][f'valid-genome-wide/avg/class/auPR'])
ax.set_ylim([0.07, 0.15])
ax.set_xlim([5e-3, 2])
ax.set_ylabel("auPR");
ax.set_xscale('log')
ax.set_xlabel("Profile importance");
fig.savefig(fig_seq_hp / f'{x_var}.same-padding.gw-binary.auprc.pdf', bbox_inches='tight')
default = 'seq,gw,OSN,1,0,0,FALSE,valid,0.5,64,50,0.001,9,FALSE'
exps = list(df.index[(df.assay == 'seq') & (df.note == 'binary + profile')& (df.padding == 'valid')])
x_var = 'profile_weight'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter((dfs[x_var]), dfs[f'valid-genome-wide/avg/class/auPR'], s=s_default)
ax.axhline(df.loc[default][f'valid-genome-wide/avg/class/auPR'])
ax.set_ylim([0.07, 0.15])
ax.set_xlim([5e-3, 2])
ax.set_ylabel("auPR");
ax.set_xscale('log')
ax.set_xlabel("Profile importance");
fig.savefig(fig_seq_hp / f'{x_var}.valid-padding.gw-binary.auprc.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'n layers')& (df.padding == 'valid')])
x_var = 'n_dil_layers'
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
dfs = df.loc[exps].sort_values(x_var)
plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/profile/binsize=1/auprc'],
label=tf,
color=tf_colors[tf],
s=15,
)
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(profile_auprc_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_nexus_hp / f'{x_var}.valid-padding.profile-auprc.pdf', bbox_inches='tight')
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osnk_tfs:
dfs = df.loc[exps].sort_values(x_var)
plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'],
label=tf,
color=tf_colors[tf],
s=15,
)
plt.xlim([0, 14])
plt.xticks([1, 5, 10])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xlabel("Number of Layers");
plt.ylabel(counts_spearman_name);
ax.set_xlabel("Number of layers");
fig.savefig(fig_nexus_hp / f'{x_var}.valid-padding.counts-spearman.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'deconv') & (df.padding == 'valid')]) + [nexus_default_exp]
x_var = 'tconv_kernel_size'
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
# ax.axvline(1, color='grey', alpha=0.2)
# ax.axvline(25, color='grey', alpha=0.2)
ax.scatter(dfs[x_var], dfs[nexus_metric_profile], s=s_default)
ax.set_ylabel(profile_auprc_name);
plt.xticks([1, 10, 25, 35])
ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.valid-padding.profile-auprc.pdf', bbox_inches='tight')
dfs = df.loc[exps].sort_values(x_var)
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.axvline(1, color='grey', alpha=0.2)
ax.axvline(25, color='grey', alpha=0.2)
plt.xticks([1, 10, 25, 35])
ax.scatter(dfs[x_var], dfs['valid-peaks/avg/counts/spearmanr'], s=s_default)
ax.set_ylabel(counts_spearman_name);
ax.set_xlabel("De-convolution size");
fig.savefig(fig_nexus_hp / f'{x_var}.counts-spearman.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'seq') & (df.note == 'n layers')& (df.padding == 'valid')])
x_var = 'n_dil_layers'
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
dfs = df.loc[exps].sort_values(x_var)
plt.scatter(dfs[x_var]+1, -dfs[f'best-epoch/val_{tf}/profile_loss'],
label=tf,
color=tf_colors[tf],
s=15,
)
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(seq_metric_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.valid-padding.profile-ll.pdf', bbox_inches='tight')
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
for tf in osn_tfs:
dfs = df.loc[exps].sort_values(x_var)
plt.scatter(dfs[x_var]+1, dfs[f'valid-peaks/{tf}/counts/spearmanr'],
label=tf,
color=tf_colors[tf],
s=15,
)
plt.xlim([0, 14])
plt.legend(loc="upper left", bbox_to_anchor=(1,1))
plt.xticks([1, 5, 10])
plt.ylabel(counts_spearman_name);
plt.xlabel("Number of Layers");
plt.savefig(fig_seq_hp / f'{x_var}.valid-padding.counts-spearman.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'basset')])
gw_binary_metric = 'valid-genome-wide/avg/class/auPR'
x_var = 'dropout'
dfs = df.loc[exps].sort_values(x_var)
dfs[gw_binary_metric].max()
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
# ax.set_xscale("log")
# ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Dropout");
fig.savefig(fig_seq_hp / f'basset.{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')
exps = list(df.index[(df.assay == 'nexus') & (df.note == 'factorized-basset')])
dfs = df.loc[exps].sort_values(x_var)
dfs[gw_binary_metric].max()
# Make the plot
fig, ax = plt.subplots(1, 1, figsize=get_figsize(.2, 1))
ax.grid(True, alpha=0.2)
ax.scatter(dfs[x_var], dfs[gw_binary_metric], s=s_default)
ax.set_ylabel('auPR');
# ax.set_xscale("log")
# ax.set_xlim([3e-4, 0.1])
ax.set_xlabel("Dropout");
fig.savefig(fig_seq_hp / f'factorized-basset.{x_var}.gw-binary-auPR.pdf', bbox_inches='tight')