In [1]:
import os
import sys
import pickle
import numpy as np
from math import exp
from scipy import stats
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from vizsequence.viz_sequence import plot_weights_given_ax
from scipy.special import softmax
import keras
import keras.losses
from keras.models import Model, Sequential, load_model
from keras import backend as K
import numpy.random as rng
import seaborn as sns
from collections import OrderedDict
from basepair.losses import twochannel_multinomial_nll
import modisco
import modisco.tfmodisco_workflow.workflow
import h5py
import modisco.util
from collections import Counter
from modisco.visualization import viz_sequence
import modisco.affinitymat.core
import modisco.cluster.phenograph.core
import modisco.cluster.phenograph.cluster
import modisco.cluster.core
import modisco.aggregator
%matplotlib inline
Using TensorFlow backend.
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  np_resource = np.dtype([("resource", np.ubyte, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  np_resource = np.dtype([("resource", np.ubyte, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/concise/utils/plot.py:115: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  min_coords = np.vstack(data.min(0) for data in polygons_data).min(0)
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/concise/utils/plot.py:116: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  max_coords = np.vstack(data.max(0) for data in polygons_data).max(0)
WARNING:tensorflow:From /users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

2020-11-28 10:31:40,689 [WARNING] From /users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

TF-MoDISco is using the TensorFlow backend.
In [2]:
counts_hdf5_results = h5py.File("/users/amr1/pho4/data/modisco/lncap_gr/task0_counts_results.hdf5","r")
activity_patterns = np.array(counts_hdf5_results['metaclustering_results']['attribute_vectors'])[
                    np.array(
        [x[0] for x in sorted(
                enumerate(counts_hdf5_results['metaclustering_results']['metacluster_indices']),
               key=lambda x: x[1])])]
plt.figure()
sns.heatmap(activity_patterns, center=0)
plt.show()
metacluster_names = [
    x.decode("utf-8") for x in 
    list(counts_hdf5_results["metaclustering_results"]
         ["all_metacluster_names"][:])]
all_patterns = []
for metacluster_name in metacluster_names:
    print("\n"+metacluster_name+"\n")
    metacluster_grp = (counts_hdf5_results["metacluster_idx_to_submetacluster_results"]
                                   [metacluster_name])
    all_pattern_names = [x.decode("utf-8") for x in 
                         list(metacluster_grp["seqlets_to_patterns_result"]
                                             ["patterns"]["all_pattern_names"][:])]
    for pattern_name in all_pattern_names:
        all_patterns.append((metacluster_name, pattern_name))
        pattern = metacluster_grp["seqlets_to_patterns_result"]["patterns"][pattern_name]
        print("\n"+pattern_name+": "+str(len(pattern["seqlets_and_alnmts"]["seqlets"]))+" seqlets\n")
        fig, ax = plt.subplots(4, 1, figsize=(30,20))
        background = np.array([0.27, 0.23, 0.23, 0.27])
        plot_weights_given_ax(ax[0], pattern["task0_hypothetical_contribs"]["fwd"])
        ax[0].set_title("hypothetical scores")
        plot_weights_given_ax(ax[1], pattern["task0_contrib_scores"]["fwd"])
        ax[1].set_title("actual importance scores")
        plot_weights_given_ax(ax[2], viz_sequence.ic_scale(np.array(pattern["sequence"]["fwd"]),
                                                        background=background))
        ax[2].set_title("onehot fwd")
        plot_weights_given_ax(ax[3], viz_sequence.ic_scale(np.array(pattern["sequence"]["rev"]),
                                                        background=background)) 
        ax[3].set_title("onehot rev")
        plt.show()
counts_hdf5_results.close()
metacluster_0


pattern_0: 994 seqlets

pattern_1: 378 seqlets

pattern_2: 139 seqlets

pattern_3: 113 seqlets

pattern_4: 110 seqlets

pattern_5: 95 seqlets

pattern_6: 88 seqlets

pattern_7: 131 seqlets

pattern_8: 82 seqlets

pattern_9: 120 seqlets

metacluster_1


pattern_0: 1210 seqlets

pattern_1: 688 seqlets

pattern_2: 490 seqlets

pattern_3: 125 seqlets

pattern_4: 120 seqlets

pattern_5: 89 seqlets

pattern_6: 88 seqlets

In [3]:
profile_hdf5_results = h5py.File("/users/amr1/pho4/data/modisco/lncap_gr/task0_profile_results.hdf5","r")
activity_patterns = np.array(profile_hdf5_results['metaclustering_results']['attribute_vectors'])[
                    np.array(
        [x[0] for x in sorted(
                enumerate(profile_hdf5_results['metaclustering_results']['metacluster_indices']),
               key=lambda x: x[1])])]
plt.figure()
sns.heatmap(activity_patterns, center=0)
plt.show()
metacluster_names = [
    x.decode("utf-8") for x in 
    list(profile_hdf5_results["metaclustering_results"]
         ["all_metacluster_names"][:])]
all_patterns = []
for metacluster_name in metacluster_names:
    print("\n"+metacluster_name+"\n")
    metacluster_grp = (profile_hdf5_results["metacluster_idx_to_submetacluster_results"]
                                   [metacluster_name])
    all_pattern_names = [x.decode("utf-8") for x in 
                         list(metacluster_grp["seqlets_to_patterns_result"]
                                             ["patterns"]["all_pattern_names"][:])]
    for pattern_name in all_pattern_names:
        all_patterns.append((metacluster_name, pattern_name))
        pattern = metacluster_grp["seqlets_to_patterns_result"]["patterns"][pattern_name]
        print("\n"+pattern_name+": "+str(len(pattern["seqlets_and_alnmts"]["seqlets"]))+" seqlets\n")
        fig, ax = plt.subplots(4, 1, figsize=(30,20))
        background = np.array([0.27, 0.23, 0.23, 0.27])
        plot_weights_given_ax(ax[0], pattern["task0_hypothetical_contribs"]["fwd"])
        ax[0].set_title("hypothetical scores")
        plot_weights_given_ax(ax[1], pattern["task0_contrib_scores"]["fwd"])
        ax[1].set_title("actual importance scores")
        plot_weights_given_ax(ax[2], viz_sequence.ic_scale(np.array(pattern["sequence"]["fwd"]),
                                                        background=background))
        ax[2].set_title("onehot fwd")
        plot_weights_given_ax(ax[3], viz_sequence.ic_scale(np.array(pattern["sequence"]["rev"]),
                                                        background=background)) 
        ax[3].set_title("onehot rev")
        plt.show()
profile_hdf5_results.close()
metacluster_0


pattern_0: 1287 seqlets

pattern_1: 155 seqlets

pattern_2: 149 seqlets

pattern_3: 118 seqlets

pattern_4: 83 seqlets

pattern_5: 95 seqlets

pattern_6: 64 seqlets

metacluster_1


pattern_0: 939 seqlets

pattern_1: 889 seqlets

pattern_2: 352 seqlets

pattern_3: 297 seqlets

pattern_4: 326 seqlets

pattern_5: 169 seqlets

pattern_6: 134 seqlets

pattern_7: 120 seqlets

pattern_8: 122 seqlets

pattern_9: 123 seqlets

pattern_10: 90 seqlets

In [4]:
print("DONE")
DONE