import sys
import os
sys.path.append(os.path.abspath("../../src/"))
import plot.viz_sequence as viz_sequence
import h5py
import numpy as np
import matplotlib.pyplot as plt
# Path to TF-MoDISco results
tf_name = os.environ["TFM_TFNAME"]
key = os.environ["TFM_KEY"]
base_path = "/users/amtseng/tfmodisco/results/tfmodisco/BPNet"
tfm_results_path = os.path.join(base_path, "BPNet_{0}_ChIPseq/BPNet_{0}_ChIPseq_{1}_tfm.h5".format(tf_name, key))
with h5py.File(tfm_results_path, "r") as f:
metaclusters = f["metacluster_idx_to_submetacluster_results"]
num_metaclusters = len(metaclusters.keys())
for metacluster_i, metacluster_key in enumerate(metaclusters.keys()):
metacluster = metaclusters[metacluster_key]
print("Metacluster: %s (%d/%d)" % (metacluster_key, metacluster_i + 1, num_metaclusters))
print("==========================================")
if "patterns" not in metacluster["seqlets_to_patterns_result"].keys():
continue
patterns = metacluster["seqlets_to_patterns_result"]["patterns"]
num_patterns = len(patterns["all_pattern_names"][:])
for pattern_i, pattern_name in enumerate(patterns["all_pattern_names"][:]):
pattern_name = pattern_name.decode()
pattern = patterns[pattern_name]
seqlets = pattern["seqlets_and_alnmts"]["seqlets"]
print("Pattern: %s (%d/%d)" % (pattern_name, pattern_i + 1, num_patterns))
print("--------------------------------------")
print("%d seqlets" % len(seqlets))
print("Sequence")
viz_sequence.plot_weights(pattern["sequence"]["fwd"][:])
print("Hypothetical contributions")
viz_sequence.plot_weights(pattern["task0_hypothetical_contribs"]["fwd"][:])
print("Contribution_scores")
viz_sequence.plot_weights(pattern["task0_contrib_scores"]["fwd"][:])
pwm = pattern["sequence"]["fwd"][:]
act_contribs = pattern["task0_contrib_scores"]["fwd"][:]