-
-
# Imports
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from basepair.imports import *
hv.extension('bokeh')
# Common paths
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
modisco_dir = model_dir / f"modisco/all/profile/"
output_dir = Path("/srv/www/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/profile")
# create_tf_session(0)
from basepair.datasets import get_gw_StrandedProfile_datasets, StrandedProfile
dataspec_path = '/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/dataspec.yml'
intervals_file = '/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/genomewide/oct-sox-nanog-klf/1kb.osnk.tsv.gz'
ds = StrandedProfile(dataspec_path, peak_width=1000, seq_width=10000, intervals_file=intervals_file, shuffle=False)
from basepair.models import seq_bpnet_cropped_extra_seqlen
seq_bpnet_cropped_extra_seqlen(conv1_kernel_size=21, n_dil_layers=9, tconv_kernel_size=25)
ds[0]
for i in tqdm(range(len(ds))):
a = ds[i]
%debug
ds[i]
train, valid = get_gw_StrandedProfile_datasets('/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/dataspec.yml',
intervals_file='/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/genomewide/oct-sox-nanog-klf/1kb.osnk.tsv.gz'
)
train.data = @get_gw_StrandedProfile_datasets() # use the default train and valid chromosomes
get_gw_StrandedProfile_datasets.dataspec = '/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/dataspec.yml'
# use genome-wide training
get_gw_StrandedProfile_datasets.intervals_file = '/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/genomewide/oct-sox-nanog-klf/1kb.osnk.tsv.gz'
get_gw_StrandedProfile_datasets.peak_width = 1000
get_gw_StrandedProfile_datasets.seq_width = 1000 # TODO - infer from the model
# get_gw_StrandedProfile_datasets.exclude_chr = ['chrX', 'chrY']