Goal

  • Explore the results of genome-wide training of binary classifiers

Conclusion

  • the core architecture of BPNet (9-dillated layers) seems to be well appropriate for the task
In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from basepair.imports import *
from plotnine import *
from basepair.utils import flatten
import warnings
warnings.filterwarnings("ignore")

pd.options.display.max_colwidth = 100

paper_config()
Using TensorFlow backend.
In [2]:
figures = f'{ddir}/figures/model-evaluation/genome-wide-training/'

Load New results

In [3]:
from basepair.config import get_data_dir
ddir = get_data_dir()

exp_dir = f"{ddir}/processed/chipseq/labels"

exp_id = 2  # Run ID
data_subsets = ['accessible', 'genome-wide']
datasets = ['chipnexus', 'chipseq']
models = ['BPNet-transfer', 'BPNetClassifier', 'Basset']

tasks = ['Sox2', 'Oct4']
In [4]:
o = []
for data_subset in data_subsets:
    for dataset in datasets:
        for model in models:
            metrics_nested = read_json(f"{exp_dir}/{dataset}/{data_subset}/{model}/{exp_id}/evaluation.valid.json")
            for task in tasks:
                metrics = metrics_nested[task]
                metrics["task"] = task
                metrics["assay"] = dataset
                metrics["data_subset"] = data_subset
                metrics["model"] = model
                o.append(metrics)
            
dfm = pd.DataFrame(o)
In [5]:
fig = ggplot(aes(x='assay', fill='model', y='auprc'), dfm) + \
  geom_bar(position='dodge', stat='identity') + facet_grid(".~data_subset+task") + \
  scale_fill_brewer('qual', 'Paired') + \
  theme_classic() + \
  theme(axis_text_x=element_text(angle=20, hjust = 1))
fig
Out[5]:
<ggplot: (8740252009641)>

Old results

In [6]:
# TODO - fill in the metrics manually
other_results = [
# ---------------- copied from tf-dragonn logs
 ['tfdragonn-default', 'chipnexus', 'accessible', 'Oct4', 0.302],
 ['tfdragonn-default', 'chipnexus', 'accessible', 'Sox2', 0.165],
 ['tfdragonn-default', 'chipnexus', 'genome-wide', 'Oct4', 0.180],
 ['tfdragonn-default', 'chipnexus', 'genome-wide', 'Sox2', 0.075],
 ['tfdragonn-default', 'chipseq', 'accessible', 'Oct4', 0.230],
 ['tfdragonn-default', 'chipseq', 'accessible', 'Sox2', 0.093],
 ['tfdragonn-default', 'chipseq', 'genome-wide', 'Oct4', 0.129],
 ['tfdragonn-default', 'chipseq', 'genome-wide', 'Sox2', 0.055],
 
 
 ['basset', 'chipnexus', 'accessible', 'Oct4', 0.298],
 ['basset', 'chipnexus', 'accessible', 'Sox2', 0.134],
 ['basset', 'chipnexus', 'genome-wide', 'Oct4', 0.194],
 ['basset', 'chipnexus', 'genome-wide', 'Sox2', 0.071],
 ['basset', 'chipseq', 'accessible', 'Oct4', 0.255],
 ['basset', 'chipseq', 'accessible', 'Sox2', 0.097],
 #['basset', 'chipseq', 'genome-wide', 'Oct4', 0.129],
 #['basset', 'chipseq', 'genome-wide', 'Sox2', 0.055],
    
# ---------------- copied from the notebook
 #['BPNet-transfer', 'chipnexus', 'accessible', 'Oct4', 0.38444],
 #['BPNet-transfer', 'chipnexus', 'accessible', 'Sox2', 0.2224],    
]
In [7]:
dfm[dfm.model == 'BPNet-transfer'].query("data_subset=='accessible'")
Out[7]:
accuracy assay auc auprc data_subset model task
0 0.9969 chipnexus 0.9331 0.1746 accessible BPNet-transfer Sox2
1 0.9864 chipnexus 0.9266 0.3738 accessible BPNet-transfer Oct4
6 0.9968 chipseq 0.9505 0.1641 accessible BPNet-transfer Sox2
7 0.9823 chipseq 0.9024 0.3052 accessible BPNet-transfer Oct4
In [8]:
dfo = pd.DataFrame(other_results, columns = ['model', 'assay', 'data_subset', 'task', 'auprc'])
df = pd.concat([dfm, dfo], sort=True)
In [9]:
plotnine.options.figure_size = get_figsize(0.6, aspect=0.5)
fig = ggplot(aes(x='assay', fill='model', y='auprc'), df) + \
  geom_bar(position='dodge', stat='identity') + facet_grid(".~data_subset+task") + \
  scale_fill_brewer('qual', 'Paired') + \
  theme_classic(base_size=10, base_family='Arial') +  \
  theme(legend_position='right', axis_text_x=element_text(angle=20, hjust = 1))
fig.save(f"{figures}/binary-classifiers.v2.pdf")
fig.save(f"{figures}/binary-classifiers.v2.png")
fig
Out[9]:
<ggplot: (-9223363296633839279)>

Seems that the new model performs worse. Why is that?

Old plot

In [47]:
from IPython.display import Image 
Image(filename=f"{figures}/binary-classifiers.png", width=500)
Out[47]: