In [1]:
import sys
import os
sys.path.append(os.path.abspath("../../src/"))
import plot.viz_sequence as viz_sequence
import h5py
import numpy as np
import matplotlib.pyplot as plt
In [2]:
# Path to TF-MoDISco results
tf_name = os.environ["TFM_TFNAME"]
key = os.environ["TFM_KEY"]
base_path = "/users/amtseng/tfmodisco/results/tfmodisco/BPNet"
tfm_results_path = os.path.join(base_path, "BPNet_{0}_ChIPseq/BPNet_{0}_ChIPseq_{1}_tfm.h5".format(tf_name, key))
In [3]:
with h5py.File(tfm_results_path, "r") as f:
    metaclusters = f["metacluster_idx_to_submetacluster_results"]
    num_metaclusters = len(metaclusters.keys())
    for metacluster_i, metacluster_key in enumerate(metaclusters.keys()):
        metacluster = metaclusters[metacluster_key]
        print("Metacluster: %s (%d/%d)" % (metacluster_key, metacluster_i + 1, num_metaclusters))
        print("==========================================")
        if "patterns" not in metacluster["seqlets_to_patterns_result"].keys():
            continue
        patterns = metacluster["seqlets_to_patterns_result"]["patterns"]
        num_patterns = len(patterns["all_pattern_names"][:])
        for pattern_i, pattern_name in enumerate(patterns["all_pattern_names"][:]):
            pattern_name = pattern_name.decode()
            pattern = patterns[pattern_name]
            seqlets = pattern["seqlets_and_alnmts"]["seqlets"]

            print("Pattern: %s (%d/%d)" % (pattern_name, pattern_i + 1, num_patterns))
            print("--------------------------------------")

            print("%d seqlets" % len(seqlets))
            print("Sequence")
            viz_sequence.plot_weights(pattern["sequence"]["fwd"][:])
            print("Hypothetical contributions")
            viz_sequence.plot_weights(pattern["task0_hypothetical_contribs"]["fwd"][:])
            print("Contribution_scores")
            viz_sequence.plot_weights(pattern["task0_contrib_scores"]["fwd"][:])

            pwm = pattern["sequence"]["fwd"][:]
            act_contribs = pattern["task0_contrib_scores"]["fwd"][:]
Metacluster: metacluster_0 (1/1)
==========================================
Pattern: pattern_0 (1/12)
--------------------------------------
8372 seqlets
Sequence
Hypothetical contributions
Contribution_scores
Pattern: pattern_1 (2/12)
--------------------------------------
844 seqlets
Sequence
Hypothetical contributions
Contribution_scores
Pattern: pattern_2 (3/12)
--------------------------------------
236 seqlets
Sequence
Hypothetical contributions
Contribution_scores
Pattern: pattern_3 (4/12)
--------------------------------------
236 seqlets
Sequence
Hypothetical contributions
Contribution_scores
Pattern: pattern_4 (5/12)
--------------------------------------
197 seqlets
Sequence
Hypothetical contributions
Contribution_scores
Pattern: pattern_5 (6/12)
--------------------------------------
182 seqlets
Sequence
Hypothetical contributions
Contribution_scores
Pattern: pattern_6 (7/12)
--------------------------------------
162 seqlets
Sequence
Hypothetical contributions
Contribution_scores
Pattern: pattern_7 (8/12)
--------------------------------------
136 seqlets
Sequence
Hypothetical contributions
Contribution_scores
Pattern: pattern_8 (9/12)
--------------------------------------
126 seqlets
Sequence
Hypothetical contributions
Contribution_scores
Pattern: pattern_9 (10/12)
--------------------------------------
65 seqlets
Sequence
Hypothetical contributions
Contribution_scores
Pattern: pattern_10 (11/12)
--------------------------------------
50 seqlets
Sequence
Hypothetical contributions
Contribution_scores
Pattern: pattern_11 (12/12)
--------------------------------------
36 seqlets
Sequence
Hypothetical contributions
Contribution_scores