# Example parameters
ddir= "/users/avsec/workspace/basepair/data/"
model_file = f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/filters=128/model.h5"
dataspec_file = f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/filters=128/dataspec.yaml"
history_file = f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/filters=128/history.csv"
seq_width = 1000
gpu = 0
num_workers = 10
# Parameters
model_file = "/srv/scratch/avsec/workspace/chipnexus/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/model.h5"
dataspec_file = "/srv/scratch/avsec/workspace/chipnexus/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/dataspec.yaml"
history_file = "/srv/scratch/avsec/workspace/chipnexus/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/history.csv"
seq_width = 1000
gpu = 1
num_workers = 10
import basepair
import pandas as pd
import numpy as np
from basepair.cli.schemas import DataSpec, TaskSpec
from pathlib import Path
from keras.models import load_model
from basepair.config import create_tf_session, get_data_dir
from basepair.datasets import StrandedProfile
from basepair.preproc import AppendCounts
from basepair.losses import MultichannelMultinomialNLL
from basepair.config import valid_chr, test_chr
from basepair.plots import regression_eval, plot_loss
from basepair.cli.evaluate import eval_profile
from basepair import samplers
from basepair.math import softmax
ddir = get_data_dir()
create_tf_session(gpu)
model = load_model(model_file, custom_objects={"mc_multinomial_nll": MultichannelMultinomialNLL(1),
"MultichannelMultinomialNLL": MultichannelMultinomialNLL(1)})
ds = DataSpec.load(dataspec_file)
tasks = list(ds.task_specs)
# Best metrics
dfh = pd.read_csv(history_file)
dict(dfh.iloc[dfh.val_loss.idxmin()])
plot_loss(dfh, [f"{p}/{task}_loss"
for task in tasks
for p in ['profile']
], figsize=(len(tasks)*4, 4))
plot_loss(dfh, [f"{p}/{task}_loss"
for task in tasks
for p in ['counts']
], figsize=(len(tasks)*4, 4))
dl_valid = StrandedProfile(ds,
incl_chromosomes=valid_chr,
peak_width=seq_width,
shuffle=False,
target_transformer=AppendCounts())
valid = dl_valid.load_all(num_workers=num_workers)
y_true = valid["targets"]
from basepair.BPNet import BiasModel
if [task for task, task_spec in ds.task_specs.items() if task_spec.bias_model]:
bm = BiasModel(ds)
valid['inputs'] = bm.predict([valid["inputs"]])[0]
y_pred = model.predict(valid['inputs'], verbose=1)
import matplotlib.pyplot as plt
for task in tasks:
plt.figure()
yt = y_true[f'counts/{task}'].mean(-1)
yp = y_pred[ds.task2idx(task, 'counts')].mean(-1)
regression_eval(yt,
yp, alpha=0.1, task=task)
from joblib import Parallel, delayed
import basepair
task=tasks[0]
yp = softmax(y_pred[ds.task2idx(task, "profile")])
yt = y_true["profile/" + task]
x = np.ravel(yt / (1+yt.sum(axis=-2, keepdims=True)))
plt.figure(figsize=(12,3))
plt.subplot(121)
plt.hist(x[(x<0.04) ], bins=100);
plt.subplot(122)
plt.hist(x[(x<0.04) & (x>0.0001)], bins=100);
np.mean(x>0.0001)
out_df = Parallel(n_jobs=len(tasks))(delayed(basepair.cli.evaluate.eval_profile)(y_true["profile/" + task],
softmax(y_pred[ds.task2idx(task, "profile")]),
binsizes=[1,10],
pos_min_threshold=0.015,
neg_max_threshold=0.005,)
for task in tasks)
df = pd.concat([out_df[i].assign(task=task) for i,task in enumerate(tasks)])
df
from basepair.plots import plot_profiles
from basepair import samplers
tasks = list(ds.task_specs)
task = tasks[0]
#idx_list = samplers.top_sum_count(y_true['profile/Nanog'], 10)
#idx_list = samplers.top_sum_count(y_true['profile/Oct4'], 10)
# Total sum
#idx_list = samplers.top_sum_count(sum([y_true[f'profile/{task}'].mean(-1) for task in tasks])[:,:,np.newaxis], 10)
#idx_list = samplers.top_max_count(y_true['profile/Oct4'], 10)
np.random.seed(42)
idx_list = samplers.random(y_true[f'profile/{task}'], 20, )
idx_list[:4]
plot_profiles(y_true, y_pred, tasks, idx_list, preproc=None, figsize=(24, 8), tic_freq=50, xlim=[200,800])