from basepair.imports import *
from plotnine import *
import warnings
warnings.filterwarnings("ignore")
pd.options.display.max_colwidth = 100
assays = ['chipnexus', 'chipseq']
subsets = ['genome-wide', 'accessible']
tasks = ['Oct4', 'Sox2']
exp_dir = f"{ddir}/processed/chipseq/labels"
def read_bpnet(exp_dir, assay, subset, tasks, model='BPNetClassifier', exp='default'):
metric_list = []
for task in tasks:
metrics = read_json(f"{exp_dir}/{assay}/{subset}/{model}/{exp}/evaluation.valid.json")
metric_list.append({"model": "BPNetClassifier",
"assay": assay,
"subset": subset,
"task": task,
"auprc": metrics[task]['auprc']
})
return metric_list
files = !ls {exp_dir}/*/*/*/*/evaluation.valid.json
from basepair.utils import flatten
from copy import deepcopy
from kipoi.utils import relative_path
def add_entry(d, k,v):
d = deepcopy(d)
d[k] = v
return d
dfa = pd.DataFrame([add_entry(flatten(read_json(f)), 'exp', relative_path(os.path.dirname(f), exp_dir)) for f in files])
print(dfa[['Oct4_auprc', 'Sox2_auprc', 'exp']].sort_values("exp").to_string())
files
DATA='/srv/scratch/avsec/workspace/chipnexus/data/processed/chipseq/labels/'
!ls {DATA}/chipnexus/genome-wide/BPNet-transfer/1
!ls {DATA}/chipnexus/genome-wide/BPNetClassifier
!cat {DATA}/chipnexus/genome-wide/BPNetClassifier/default/evaluation.valid.json
!diff {DATA}/chipnexus/genome-wide/BPNetClassifier/2/config.gin {DATA}/chipnexus/genome-wide/BPNetClassifier/default/config.gin
!cat {DATA}/chipnexus/genome-wide/BPNetClassifier/2/evaluation.valid.json
bpnet_results = [r for task in tasks for subset in subsets for assay in assays
for r in read_bpnet(exp_dir, assay, subset, tasks) ]
print(pd.DataFrame(bpnet_results).to_string())
# TODO - fill in the metrics manually
other_results = [
# ---------------- copied from tf-dragonn logs
['tfdragonn-default', 'chipnexus', 'accessible', 'Oct4', 0.302],
['tfdragonn-default', 'chipnexus', 'accessible', 'Sox2', 0.165],
['tfdragonn-default', 'chipnexus', 'genome-wide', 'Oct4', 0.180],
['tfdragonn-default', 'chipnexus', 'genome-wide', 'Sox2', 0.075],
['tfdragonn-default', 'chipseq', 'accessible', 'Oct4', 0.230],
['tfdragonn-default', 'chipseq', 'accessible', 'Sox2', 0.093],
['tfdragonn-default', 'chipseq', 'genome-wide', 'Oct4', 0.129],
['tfdragonn-default', 'chipseq', 'genome-wide', 'Sox2', 0.055],
['basset', 'chipnexus', 'accessible', 'Oct4', 0.298],
['basset', 'chipnexus', 'accessible', 'Sox2', 0.134],
['basset', 'chipnexus', 'genome-wide', 'Oct4', 0.194],
['basset', 'chipnexus', 'genome-wide', 'Sox2', 0.071],
['basset', 'chipseq', 'accessible', 'Oct4', 0.255],
['basset', 'chipseq', 'accessible', 'Sox2', 0.097],
#['basset', 'chipseq', 'genome-wide', 'Oct4', 0.129],
#['basset', 'chipseq', 'genome-wide', 'Sox2', 0.055],
# ---------------- copied from the notebook
['BPNet-transfer', 'chipnexus', 'accessible', 'Oct4', 0.38444],
['BPNet-transfer', 'chipnexus', 'accessible', 'Sox2', 0.2224],
]
df = pd.DataFrame(bpnet_results)
dfo = pd.DataFrame(other_results, columns = ['model', 'assay', 'subset', 'task', 'auprc'])
df = pd.concat([df, dfo], sort=True)
fig = ggplot(aes(x='assay', fill='model', y='auprc'), df) + \
geom_bar(position='dodge', stat='identity') + facet_grid(".~subset+task") + \
scale_fill_brewer('qual', 'Paired') + \
theme_classic() + \
theme(axis_text_x=element_text(angle=20, hjust = 1))
fig
# fig.save("binary-classifiers.png", dpi=300)
# fig.save("binary-classifiers.pdf", dpi=300)
# upload the figure to gdrive
gdrive_upload_fig(fig, 'genome-wide-training/binary-classifiers')