In [1]:
import os
import sys
import pickle
import numpy as np
from math import exp
from scipy import stats
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from vizsequence.viz_sequence import plot_weights_given_ax
from scipy.special import softmax
import keras
import keras.losses
from keras.models import Model, Sequential, load_model
from keras import backend as K
import numpy.random as rng
import seaborn as sns
from collections import OrderedDict
from basepair.losses import twochannel_multinomial_nll
import modisco
import modisco.tfmodisco_workflow.workflow
import h5py
import modisco.util
from collections import Counter
from modisco.visualization import viz_sequence
import modisco.affinitymat.core
import modisco.cluster.phenograph.core
import modisco.cluster.phenograph.cluster
import modisco.cluster.core
import modisco.aggregator
%matplotlib inline
Using TensorFlow backend.
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  np_resource = np.dtype([("resource", np.ubyte, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  np_resource = np.dtype([("resource", np.ubyte, 1)])
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/concise/utils/plot.py:115: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  min_coords = np.vstack(data.min(0) for data in polygons_data).min(0)
/users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/concise/utils/plot.py:116: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  max_coords = np.vstack(data.max(0) for data in polygons_data).max(0)
WARNING:tensorflow:From /users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

2020-04-13 00:13:01,213 [WARNING] From /users/amr1/miniconda3/envs/basepair/lib/python3.6/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

TF-MoDISco is using the TensorFlow backend.
In [2]:
folder = "pho4_pbexo"
task_names = ["pho4"]

for task_id, task in enumerate(task_names):
    tfname = task
    print("\n\n"+tfname+" COUNTS\n\n")
    counts_hdf5_results = h5py.File("data/modisco/"+folder+"/"+tfname+"_counts/results.hdf5","r")
    activity_patterns = np.array(counts_hdf5_results['metaclustering_results']['attribute_vectors'])[
                        np.array(
            [x[0] for x in sorted(
                    enumerate(counts_hdf5_results['metaclustering_results']['metacluster_indices']),
                   key=lambda x: x[1])])]
    plt.figure()
    sns.heatmap(activity_patterns, center=0)
    plt.show()
#     plt.savefig("data/modisco/"+folder+'/counts/activity_patterns.png')
    metacluster_names = [
        x.decode("utf-8") for x in 
        list(counts_hdf5_results["metaclustering_results"]
             ["all_metacluster_names"][:])]
    all_patterns = []
    for metacluster_name in metacluster_names:
        print("\n"+metacluster_name+"\n")
        metacluster_grp = (counts_hdf5_results["metacluster_idx_to_submetacluster_results"]
                                       [metacluster_name])
        all_pattern_names = [x.decode("utf-8") for x in 
                             list(metacluster_grp["seqlets_to_patterns_result"]
                                                 ["patterns"]["all_pattern_names"][:])]
        for pattern_name in all_pattern_names:
            all_patterns.append((metacluster_name, pattern_name))
            pattern = metacluster_grp["seqlets_to_patterns_result"]["patterns"][pattern_name]
            print("\n"+pattern_name+": "+str(len(pattern["seqlets_and_alnmts"]["seqlets"]))+" seqlets\n")
            fig, ax = plt.subplots(4, 1, figsize=(30,20))
            background = np.array([0.27, 0.23, 0.23, 0.27])
            plot_weights_given_ax(ax[0], pattern[tfname+"_hypothetical_contribs"]["fwd"])
            ax[0].set_title("hypothetical scores")
            plot_weights_given_ax(ax[1], pattern[tfname+"_contrib_scores"]["fwd"])
            ax[1].set_title("actual importance scores")
            plot_weights_given_ax(ax[2], viz_sequence.ic_scale(np.array(pattern["sequence"]["fwd"]),
                                                            background=background))
            ax[2].set_title("onehot fwd")
            plot_weights_given_ax(ax[3], viz_sequence.ic_scale(np.array(pattern["sequence"]["rev"]),
                                                            background=background)) 
            ax[3].set_title("onehot rev")
            plt.show()
#             fig.savefig("data/modisco/"+folder+"/counts_"+\
#                         str(metacluster_name)+'_'+str(pattern_name)+'_activity_'+\
#                         str(metacluster_grp["activity_pattern"][:])+'_seqlets_'+\
#                         str(len(pattern["seqlets_and_alnmts"]["seqlets"]))+'.png')
    counts_hdf5_results.close()
    
    print("\n\n"+tfname+" PROFILE\n\n")
    profile_hdf5_results = h5py.File("data/modisco/"+folder+"/"+tfname+"_profile/results.hdf5","r")
    activity_patterns = np.array(profile_hdf5_results['metaclustering_results']['attribute_vectors'])[
                        np.array(
            [x[0] for x in sorted(
                    enumerate(profile_hdf5_results['metaclustering_results']['metacluster_indices']),
                   key=lambda x: x[1])])]
    plt.figure()
    sns.heatmap(activity_patterns, center=0)
    plt.show()
#     plt.savefig("data/modisco/"+folder+'/profile/activity_patterns.png')
    metacluster_names = [
        x.decode("utf-8") for x in 
        list(profile_hdf5_results["metaclustering_results"]
             ["all_metacluster_names"][:])]
    all_patterns = []
    for metacluster_name in metacluster_names:
        print("\n"+metacluster_name+"\n")
        metacluster_grp = (profile_hdf5_results["metacluster_idx_to_submetacluster_results"]
                                       [metacluster_name])
        all_pattern_names = [x.decode("utf-8") for x in 
                             list(metacluster_grp["seqlets_to_patterns_result"]
                                                 ["patterns"]["all_pattern_names"][:])]
        for pattern_name in all_pattern_names:
            all_patterns.append((metacluster_name, pattern_name))
            pattern = metacluster_grp["seqlets_to_patterns_result"]["patterns"][pattern_name]
            print("\n"+pattern_name+": "+str(len(pattern["seqlets_and_alnmts"]["seqlets"]))+" seqlets\n")
            fig, ax = plt.subplots(4, 1, figsize=(30,20))
            background = np.array([0.27, 0.23, 0.23, 0.27])
            plot_weights_given_ax(ax[0], pattern[tfname+"_hypothetical_contribs"]["fwd"])
            ax[0].set_title("hypothetical scores")
            plot_weights_given_ax(ax[1], pattern[tfname+"_contrib_scores"]["fwd"])
            ax[1].set_title("actual importance scores")
            plot_weights_given_ax(ax[2], viz_sequence.ic_scale(np.array(pattern["sequence"]["fwd"]),
                                                            background=background))
            ax[2].set_title("onehot fwd")
            plot_weights_given_ax(ax[3], viz_sequence.ic_scale(np.array(pattern["sequence"]["rev"]),
                                                            background=background)) 
            ax[3].set_title("onehot rev")
            plt.show()
#             fig.savefig("data/modisco/"+folder+"/profile_"+\
#                         str(metacluster_name)+'_'+str(pattern_name)+'_activity_'+\
#                         str(metacluster_grp["activity_pattern"][:])+'_seqlets_'+\
#                         str(len(pattern["seqlets_and_alnmts"]["seqlets"]))+'.png')
    profile_hdf5_results.close()

pho4 COUNTS


metacluster_0


pattern_0: 149 seqlets

pattern_1: 133 seqlets

pattern_2: 365 seqlets

metacluster_1


pattern_0: 2763 seqlets

pattern_1: 1111 seqlets

pattern_2: 823 seqlets

pattern_3: 741 seqlets

pattern_4: 720 seqlets

pattern_5: 455 seqlets

pattern_6: 96 seqlets

pattern_7: 82 seqlets


pho4 PROFILE


metacluster_0


pattern_0: 575 seqlets

pattern_1: 454 seqlets

pattern_2: 150 seqlets

pattern_3: 89 seqlets

pattern_4: 115 seqlets

metacluster_1


pattern_0: 3240 seqlets

pattern_1: 408 seqlets

pattern_2: 194 seqlets

pattern_3: 154 seqlets

pattern_4: 147 seqlets

pattern_5: 130 seqlets

pattern_6: 108 seqlets

pattern_7: 94 seqlets

pattern_8: 104 seqlets

pattern_9: 79 seqlets

In [3]:
print("DONE")
DONE