%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from basepair.imports import *
from plotnine import *
from basepair.utils import flatten
import warnings
warnings.filterwarnings("ignore")
pd.options.display.max_colwidth = 100
paper_config()
figures = f'{ddir}/figures/model-evaluation/genome-wide-training/'
from basepair.config import get_data_dir
ddir = get_data_dir()
exp_dir = f"{ddir}/processed/chipseq/labels"
exp_id = 2 # Run ID
data_subsets = ['accessible', 'genome-wide']
datasets = ['chipnexus', 'chipseq']
models = ['BPNet-transfer', 'BPNetClassifier', 'Basset']
tasks = ['Sox2', 'Oct4']
o = []
for data_subset in data_subsets:
for dataset in datasets:
for model in models:
metrics_nested = read_json(f"{exp_dir}/{dataset}/{data_subset}/{model}/{exp_id}/evaluation.valid.json")
for task in tasks:
metrics = metrics_nested[task]
metrics["task"] = task
metrics["assay"] = dataset
metrics["data_subset"] = data_subset
metrics["model"] = model
o.append(metrics)
dfm = pd.DataFrame(o)
fig = ggplot(aes(x='assay', fill='model', y='auprc'), dfm) + \
geom_bar(position='dodge', stat='identity') + facet_grid(".~data_subset+task") + \
scale_fill_brewer('qual', 'Paired') + \
theme_classic() + \
theme(axis_text_x=element_text(angle=20, hjust = 1))
fig
# TODO - fill in the metrics manually
other_results = [
# ---------------- copied from tf-dragonn logs
['tfdragonn-default', 'chipnexus', 'accessible', 'Oct4', 0.302],
['tfdragonn-default', 'chipnexus', 'accessible', 'Sox2', 0.165],
['tfdragonn-default', 'chipnexus', 'genome-wide', 'Oct4', 0.180],
['tfdragonn-default', 'chipnexus', 'genome-wide', 'Sox2', 0.075],
['tfdragonn-default', 'chipseq', 'accessible', 'Oct4', 0.230],
['tfdragonn-default', 'chipseq', 'accessible', 'Sox2', 0.093],
['tfdragonn-default', 'chipseq', 'genome-wide', 'Oct4', 0.129],
['tfdragonn-default', 'chipseq', 'genome-wide', 'Sox2', 0.055],
['basset', 'chipnexus', 'accessible', 'Oct4', 0.298],
['basset', 'chipnexus', 'accessible', 'Sox2', 0.134],
['basset', 'chipnexus', 'genome-wide', 'Oct4', 0.194],
['basset', 'chipnexus', 'genome-wide', 'Sox2', 0.071],
['basset', 'chipseq', 'accessible', 'Oct4', 0.255],
['basset', 'chipseq', 'accessible', 'Sox2', 0.097],
#['basset', 'chipseq', 'genome-wide', 'Oct4', 0.129],
#['basset', 'chipseq', 'genome-wide', 'Sox2', 0.055],
# ---------------- copied from the notebook
#['BPNet-transfer', 'chipnexus', 'accessible', 'Oct4', 0.38444],
#['BPNet-transfer', 'chipnexus', 'accessible', 'Sox2', 0.2224],
]
dfm[dfm.model == 'BPNet-transfer'].query("data_subset=='accessible'")
dfo = pd.DataFrame(other_results, columns = ['model', 'assay', 'data_subset', 'task', 'auprc'])
df = pd.concat([dfm, dfo], sort=True)
plotnine.options.figure_size = get_figsize(0.6, aspect=0.5)
fig = ggplot(aes(x='assay', fill='model', y='auprc'), df) + \
geom_bar(position='dodge', stat='identity') + facet_grid(".~data_subset+task") + \
scale_fill_brewer('qual', 'Paired') + \
theme_classic(base_size=10, base_family='Arial') + \
theme(legend_position='right', axis_text_x=element_text(angle=20, hjust = 1))
fig.save(f"{figures}/binary-classifiers.v2.pdf")
fig.save(f"{figures}/binary-classifiers.v2.png")
fig
Seems that the new model performs worse. Why is that?
from IPython.display import Image
Image(filename=f"{figures}/binary-classifiers.png", width=500)