from basepair.imports import *
from basepair.gdrive import gdrive_upload_fig
from basepair.modisco.utils import shorten_pattern
from joblib import Parallel, delayed
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
tasks = ['Klf4', 'Nanog', 'Oct4', 'Sox2']
def upload_pssm(pattern, pdir):
modisco_pdir = model_dir / f"modisco/{pdir}"
mr = ModiscoResult(modisco_pdir / "modisco.h5")
mr.open()
if mr.n_seqlets(*pattern.split("/")) < 100:
return
fig = mr.plot_pssm(*pattern.split("/"), trim_frac=0.08)
gdrive_upload_fig(fig, f"modisco/{pdir}/motifs/{shorten_pattern(pattern)}")
mr.close()
def get_patterns(pdir):
modisco_pdir = model_dir / f"modisco/{pdir}"
mr = ModiscoResult(modisco_pdir / "modisco.h5")
mr.open()
p = mr.patterns()
mr.close()
return p
for task in tasks[2:]:
Parallel(n_jobs=2, backend='threading')(delayed(upload_pssm)(pattern, 'by_peak_tasks/weighted/' + task)
for pattern in tqdm(get_patterns('by_peak_tasks/weighted/' + task)))
Parallel(n_jobs=2, backend='threading')(delayed(upload_pssm)(pattern, "valid") for pattern in tqdm(get_patterns('valid')))