from basepair.imports import *
output_dir = '/srv/www/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/profile'
modisco_dir = '/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/profile'
imp_scores = '/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/grad.all.h5'
report_url='http://mitra.stanford.edu/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/profile/results.html'
from basepair.modisco.table import ModiscoData, modisco_table, write_modisco_table
from basepair.modisco.motif_clustering import hirearchically_reorder_table
assert os.path.exists(output_dir)
data = ModiscoData.load(modisco_dir, imp_scores)
data
data.mr.fpath
!cat /users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/profile/kwargs.json
len(data.d['metadata']['interval_from_task'])
pd.Series(data.d['metadata']['interval_from_task'].count_values()
df = modisco_table(data)
output_dir
report_url
data.tasks
output_dir
print("Writing the results")
write_modisco_table(df, output_dir, report_url, 'pattern_table')
print("Writing clustered table")
write_modisco_table(hirearchically_reorder_table(df, data.tasks),
output_dir, report_url, 'pattern_table.sorted')
print("Done!")
data.get_peak_task_idx
profiles = OrderedDict([(pattern, {task: data.get_profile_wide(pattern, task).mean(axis=0)
for task in data.tasks})
for pattern in data.mr.patterns()])
write_pkl(profiles, Path(output_dir) / 'footprints.pkl')