def collect_vdoms_pattern(mr):
patterns = mr.patterns()
figs = OrderedDict()
vdom_to_figs = OrderedDict()
for metacluster in mr.metaclusters():
meta_patterns = mr.patterns(metacluster=metacluster)
for pattern in meta_patterns:
fullpattern_name = "{0}/{1}".format(metacluster, pattern)
figs[fullpattern_name] = mr.plot_pattern(metacluster, pattern)
vdom_to_figs[fullpattern_name] = [fig2vdom(fig) for fig in figs[fullpattern_name]]
#break
return vdom_to_figs
def get_data(task_idx):
root = "/mnt/lab_data/kundaje/msharmin/mouse_hem/with_tfd/full_mouse50/dlmodisco_old"
tfd_root = "/mnt/lab_data/kundaje/msharmin/mouse_hem/with_tfd"
srv_root = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/dlmodisco_old"
cisbpfile = "{0}/cisbp_id_tf_map.p".format(tfd_root)
cisbp_map = pickle.load(open(cisbpfile, 'rb'))
modisco_dir = "{0}/task_{1}-naivegw".format(srv_root, task_idx)
mr = ModiscoResult(f"{modisco_dir}/results.hdf5")
mr.open()
mapfile = os.path.join(root, "task_{0}-naivegw".format(task_idx), "modisco_cisbp_match.p")
modisco_map = pickle.load(open(mapfile, 'rb'))
return mr, modisco_map, cisbp_map
def display_results(sample_name, mr, vdom_to_figs, modisco_map, cisbp_map, importance = True):
patterns = mr.patterns()
vdom_items = OrderedDict()
metaclusters = mr.metaclusters()
if(importance==True):
metacluster = 'metacluster_0'
else:
metacluster = 'metacluster_1'
#for metacluster in mr.metaclusters():
vdom_items[metacluster] = []
meta_patterns = mr.patterns(metacluster=metacluster)
n_patterns = len(meta_patterns)
n_seqlets = sum([mr.n_seqlets(metacluster, pattern)
for pattern in meta_patterns])
for pattern in meta_patterns:
fullpattern_name = "{0}/{1}".format(metacluster, pattern)
tfnames = []
for match in modisco_map[fullpattern_name]:
if(match['q-value'] >= 0.01):
continue
tfname = cisbp_map[match['Target ID']].split('_')[0]
if('(' in tfname):
tfname = tfname.split('(')[1].split(')')[0]
tfnames.append(tfname)
#print(patterns[i], tfnames)
n = mr.n_seqlets(metacluster, pattern)
#mr.plot_pssm(metacluster, pattern, title="{0}, #seqlets: {1}, cisbp_match: {2}".format(patterns[i], n, tfnames))
trimmed_motif = vdom_pssm(mr.get_pssm(metacluster, pattern, rc=False, trim_frac=0.08), letter_width=0.15, height=0.5)
#full_motif = vdom_pssm(mr.get_pssm(metacluster, pattern, rc=False, trim_frac=0.0), letter_width=0.15, height=0.5)
vdom_items[metacluster].append(details(summary(pattern, f": # seqlets: {n}" f" {tfnames}", trimmed_motif),
details(summary("Sequence"), vdom_to_figs[fullpattern_name][0], ),
details(summary("Contrib Scores"), vdom_to_figs[fullpattern_name][1], ),
details(summary("Hyp_Contrib Scores"), vdom_to_figs[fullpattern_name][2], ),
id=fullpattern_name
)
)
#display(details(summary(pattern, f": # seqlets: {n}", trimmed_motif)))
#break
display(details(summary(b(metacluster), f", # patterns: {n_patterns},"
f" # seqlets: {n_seqlets}, "
"important for: ", b("{}".format(sample_name))),
ul([li(pattern) for pattern in vdom_items[metacluster]], start=0),
id=metacluster,
open=True))
return None