Goal

  • add additional evaluation metrics

Tasks

  • [ ]

Required files

-

In [1]:
# Imports
from basepair.imports import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
hv.extension('bokeh')
Using TensorFlow backend.
In [2]:
import wandb
In [3]:
api = wandb.Api()
In [4]:
runs = api.runs("avsec/basepair")
runs = [r for r in runs if r.state =='finished']
In [5]:
tasks = ['Oct4', 'Sox2', 'Nanog', 'Klf4']
In [6]:
from basepair.utils import unflatten
In [8]:
def average_profile(pe):
    tasks = list(pe)
    binsizes = list(pe[tasks[0]])
    return {binsize: mean([pe[task][binsize]['auprc'] for task in tasks])
            for binsize in binsizes}

def average_counts(pe):
    tasks = list(pe)
    metrics = list(pe[tasks[0]])
    return {metric: mean([pe[task][metric] for task in tasks])
            for metric in metrics}
In [9]:
def prefix_dict(d, prefix):
    return {prefix + k:v for k,v in d.items()}
    
In [15]:
runs = api.runs("avsec/basepair")
runs = [r for r in runs if r.state =='finished']
In [14]:
# swap avg and profile
for r in runs:
    e = unflatten(dict(r.summary), separator='/')
    if 'eval' not in e:
        continue
    for dataset in e['eval']:
        if 'avg' in e['eval'][dataset]:
            continue
        if 'counts' in e['eval'][dataset]:
            add_dict = prefix_dict(e['eval'][dataset]['counts']['avg'], f'eval/{dataset}/avg/counts/')
            r.summary.update(add_dict)
        if 'profile' in e['eval'][dataset]:
            add_dict = prefix_dict(e['eval'][dataset]['profile']['avg'], f'eval/{dataset}/avg/profile/')
            r.summary.update(add_dict)
In [18]:
# swap avg and profile
for r in runs:
    e = unflatten(dict(r.summary), separator='/')
    if 'eval' not in e:
        continue
    for dataset in e['eval']:
        try:
            if 'profile' in e['eval'][dataset]:
                add_dict = {f"eval/{dataset}/avg/profile/binsize=1/auprc": e['eval'][dataset]['profile']['avg']['binsize=1']}
                # add_dict = prefix_dict(e['eval'][dataset]['profile']['avg'], f'eval/{dataset}/avg/profile/')
                r.summary.update(add_dict)
        except:
            pass
In [50]:
for r in runs:
    e = unflatten(dict(r.summary), separator='/')
    for dataset in e['eval']:
        if 'avg' in e['eval'][dataset]['counts']:
            continue
        if 'counts' in e['eval'][dataset]:
            add_dict = average_counts(e['eval'][dataset]['counts'])
            add_dict = prefix_dict(add_dict, f'eval/{dataset}/counts/avg/')
            r.summary.update(add_dict)
        if 'profile' in e['eval'][dataset]:
            add_dict = average_profile(e['eval'][dataset]['profile'])
            add_dict = prefix_dict(add_dict, f'eval/{dataset}/profile/avg/')
            r.summary.update(add_dict)
    
In [36]:
average_counts(e['eval']['train-peaks']['counts'])
Out[36]:
{'pearsonr': 0.5973620414733887, 'spearmanr': 0.6105665951403851}
In [34]:
average_profile(e['eval']['train-peaks']['profile'])
Out[34]:
{'binsize=1': 0.37119958375410106, 'binsize=10': 0.7263690393834852}
In [27]:
e = unflatten(dict(runs[0].summary), separator='/')
In [ ]:
runs[0].summary
In [ ]:
'eval/train-peaks/profile/Oct4/binsize=10/n_positives'
In [8]:
runs
Out[8]:
<Runs avsec/basepair (34)>
In [ ]: