# Imports
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import basepair
from basepair.imports import *
# hv.extension('bokeh')
# Common paths
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
create_tf_session(0)
ls {model_dir}
from basepair.cli.schemas import HParams
from basepair.datasets import get_StrandedProfile_datasets
from basepair.models import seq_bpnet_cropped, seq_bpnet_cropped_extra_seqlen
ds = DataSpec.load(model_dir / "dataspec.yaml")
hp = HParams.load(model_dir / "hparams.yaml")
for k,v in hp.model.kwargs.items(): print(f"{k} = {v}")
# hparams
filters = 64
conv1_kernel_size = 25
tconv_kernel_size = 25
n_dil_layers = 9
lr = 0.004
c_task_weight = 10
batchnorm = False
!cat {model_dir}/hparams.yml
m = seq_bpnet_cropped(['Sox2'],
filters=filters,
conv1_kernel_size=conv1_kernel_size,
tconv_kernel_size=tconv_kernel_size,
tconv_n_hidden=0,
n_dil_layers=n_dil_layers,
lr=lr,
batchnorm=batchnorm,
c_task_weight=c_task_weight,
use_profile=True,
use_counts=True,
outputs_per_task=2,
task_use_bias=False,
profile_loss='mc_multinomial_nll',
count_loss='mse',
)
# compute the additional required sequence length
add_seqlen = seq_bpnet_cropped_extra_seqlen(conv1_kernel_size,
n_dil_layers,
tconv_kernel_size)
add_seqlen
from basepair.models import seq_bpnet_cropped_extra_seqlen
# compute the additional required sequence length
add_seqlen = seq_bpnet_cropped_extra_seqlen(25,
12,
25)
add_seqlen
# compute the additional required sequence length
add_seqlen = seq_bpnet_cropped_extra_seqlen(25,
11,
25)
add_seqlen
print(m.predict(np.ones((1, 1000 + add_seqlen, 4)))[0].shape)
m.summary()
train, valid, test = get_StrandedProfile_datasets(ds,
peak_width=1000,
seq_width=1000 + add_seqlen,
shuffle=True,
valid_chr=['chr2', 'chr3', 'chr4'],
test_chr=['chr1', 'chr8', 'chr9'])
train_all = train.load_all(num_workers=20)
valid_all = valid.load_all(num_workers=20)
output_dir = Path("/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/extended_exp")
from uuid import uuid4
from keras.callbacks import EarlyStopping, History, ModelCheckpoint
# get the experiemnt directory
exp_dir = output_dir / str(uuid4())[:8]
exp_dir.mkdir(parents=True, exist_ok=True)
ckp_file = str(exp_dir / 'model.h5')
history = m.fit(train_all['inputs'],
train_all['targets'],
batch_size=256,
epochs=100,
validation_data=(valid_all['inputs'], valid_all['targets']),
callbacks=[EarlyStopping(patience=5),
History(),
ModelCheckpoint(ckp_file, save_best_only=True)]
)
# get the best model
model = load_model(ckp_file)