Goal

  • Train a classifier to map sub-sequences to the right modisco classes #28

Input: DNA sequence Output: track along the DNA sequence with classes: 0 (not a modisco motif), 1-seqlet 1, 2-seqlet 2, ... Model: Trained BPNet

Evaluation:

  • check that you get high accuracy on the held-out training set

Open questions

  • how to align the vector of the output variables?
    • is this alignment important?
  • Use a Conv1D on the final activation map to consider a wider context

TODO

  • [x] load the recent oct-sox-nanog-klf4 dataset and train the model on it
  • [x] Create the training set
  • [x] Create a classifier
  • [x] train and evaluate
    • how well can you map the existing seqlets?
      • what's the confusion matrix?
    • how does the conv-filter look like?

Load the required data

In [2]:
import basepair
import numpy as np
sess = basepair.config.create_tf_session(0)
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
In [3]:
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Model, load_model
from pathlib import Path
import scikitplot as skplt
In [4]:
ls /users/avsec/workspace/basepair-workflow/models/0/
data.pkl       eval/        hparams.yaml  modisco/          tracks/
dataspec.yaml  history.csv  model.h5      preprocessor.pkl
In [5]:
mdir = Path("/users/avsec/workspace/basepair-workflow/models/0/")
In [6]:
ls {mdir}/modisco/test/Sox2/weighted
distances.npy  included_samples.npy  kwargs.json  modisco.h5  profile/
In [7]:
from basepair.utils import read_pkl
train,valid,test = read_pkl(mdir / "data.pkl")
In [9]:
from basepair.BPNet import BPNetPredictor

bpnet = BPNetPredictor.from_mdir(mdir)
In [10]:
seq_grad = 'weighted'
grads_pos = bpnet.input_grad(test[0], strand='pos', task_id=0, seq_grad=seq_grad, batch_size=512)
grads_neg = bpnet.input_grad(test[0], strand='neg', task_id=0, seq_grad=seq_grad, batch_size=512)
igrads_pos = grads_pos * test[0]
igrads_neg = grads_neg * test[0]

grads_pos_ext = grads_pos.reshape((grads_pos.shape[0], -1))
grads_neg_ext = grads_neg.reshape((grads_neg.shape[0], -1))

# Setup different scores
hyp_scores = grads_pos + grads_neg
hyp_scores = hyp_scores - hyp_scores.mean(-1, keepdims=True)
scores = hyp_scores * test[0]
In [11]:
from scipy.spatial.distance import correlation

# compute the distances
distances = np.array([correlation(grads_neg_ext[i], grads_pos_ext[i])
                      for i in range(len(grads_neg_ext))])
In [12]:
plt.hist(distances, bins=30);
plt.ylabel("Frequency");
plt.xlabel("Cosine distance betwen positive and negative strand");
In [13]:
scores.shape
Out[13]:
(18086, 200, 4)

Create the training set

  • given all the seqlet instance locations, label the training set with a positive and a negative set
    • positive class = center of the seqlet
In [14]:
from basepair.modisco import ModiscoResult
modisco_dir = mdir / "modisco/test/Sox2/weighted"
mr = ModiscoResult(modisco_dir / "modisco.h5")
mr.open()
In [15]:
incl = np.load(modisco_dir / "included_samples.npy")
In [16]:
test[0].shape
Out[16]:
(18086, 200, 4)
In [17]:
# Old
def label_seqlets(seq_shape, seqlets):
    """Given seqlets, create an array with labelled seqlets

    Args:
      seq_shape: seq sequence dataset
      seqlets: dictionary of seqlet locations
        (returned by ModiscoResult.seqlets())
    """
    out = np.zeros(shape=seq_shape + (len(seqlets) + 1, ))

    for i, (cls, sl) in enumerate(seqlets.items()):
        for seqlet in sl:
            out[int(seqlet.seqname), seqlet.center(), i] = 1
    out[:, :, -1] = 1 - out[:, :, :len(seqlets)].max(axis=-1)  # Last-one denotes the background
    assert np.all(out.sum(axis=-1) == 1)  # array needs to be one-hot-encoded
    return out

def valid_seqlets(seq_shape, seqlets):
    """Given seqlets, create an array with labelled seqlets

    Args:
      seq_shape: seq sequence dataset
      seqlets: dictionary of seqlet locations
        (returned by ModiscoResult.seqlets())
    """
    out = np.zeros(shape=seq_shape)
    for seqlet in seqlets:
        out[int(seqlet.seqname), seqlet.center()] = 1
    return out
In [23]:
def label_seqlets(seq_shape, seqlets, n_patterns):
    """Given seqlets, create an array with labelled seqlets

    Args:
      seq_shape: seq sequence dataset
      seqlets: dictionary of seqlet locations
        (returned by ModiscoResult.seqlets())
    """
    out = np.zeros(shape=seq_shape + (n_patterns + 1, ))
    
    def name2idx(name, n_patterns):
        if name is None:
            return n_patterns
        else:
            return int(name.replace("pattern_", ""))
    for seqlet in seqlets:
        out[int(seqlet.seqname), seqlet.center(), name2idx(seqlet.name, n_patterns)] = 1
    return out
In [19]:
seq = test[0]
In [104]:
y_seqlets = label_seqlets(seq.shape[:2], mr.all_seqlets(True), len(mr.patterns()))
In [105]:
seqlet_mask = y_seqlets.max(axis=-1)
In [31]:
seqlet_mask.shape
Out[31]:
(18086, 200)
In [32]:
len(mr.all_seqlets())
Out[32]:
19193
In [33]:
(seqlet_mask[..., np.newaxis]*y_seqlets).sum(axis=0).sum(axis=0)
Out[33]:
array([ 2258.,  2019.,   652.,   252.,   191.,   180.,   128.,   111.,
          72.,   101.,    57.,    33.,    54.,    58.,    36., 12969.])
In [34]:
{k: len(v) for k,v in mr.seqlets().items()}
Out[34]:
{'pattern_0': 2258,
 'pattern_1': 2020,
 'pattern_2': 652,
 'pattern_3': 252,
 'pattern_4': 198,
 'pattern_5': 187,
 'pattern_6': 139,
 'pattern_7': 112,
 'pattern_8': 104,
 'pattern_9': 105,
 'pattern_10': 60,
 'pattern_11': 49,
 'pattern_12': 54,
 'pattern_13': 58,
 'pattern_14': 46}
In [584]:
pd.Series([x.strand for x in mr.seqlets()['pattern_0']]).value_counts()
Out[584]:
+    1249
-    1009
dtype: int64
In [127]:
bpnet.model.summary()
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
seq (InputLayer)                (None, 200, 4)       0                                            
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 200, 21)      1785        seq[0][0]                        
__________________________________________________________________________________________________
conv1d_2 (Conv1D)               (None, 200, 21)      1344        conv1d_1[0][0]                   
__________________________________________________________________________________________________
add_1 (Add)                     (None, 200, 21)      0           conv1d_1[0][0]                   
                                                                 conv1d_2[0][0]                   
__________________________________________________________________________________________________
conv1d_3 (Conv1D)               (None, 200, 21)      1344        add_1[0][0]                      
__________________________________________________________________________________________________
add_2 (Add)                     (None, 200, 21)      0           conv1d_1[0][0]                   
                                                                 conv1d_2[0][0]                   
                                                                 conv1d_3[0][0]                   
__________________________________________________________________________________________________
conv1d_4 (Conv1D)               (None, 200, 21)      1344        add_2[0][0]                      
__________________________________________________________________________________________________
add_3 (Add)                     (None, 200, 21)      0           conv1d_1[0][0]                   
                                                                 conv1d_2[0][0]                   
                                                                 conv1d_3[0][0]                   
                                                                 conv1d_4[0][0]                   
__________________________________________________________________________________________________
conv1d_5 (Conv1D)               (None, 200, 21)      1344        add_3[0][0]                      
__________________________________________________________________________________________________
add_4 (Add)                     (None, 200, 21)      0           conv1d_1[0][0]                   
                                                                 conv1d_2[0][0]                   
                                                                 conv1d_3[0][0]                   
                                                                 conv1d_4[0][0]                   
                                                                 conv1d_5[0][0]                   
__________________________________________________________________________________________________
conv1d_6 (Conv1D)               (None, 200, 21)      1344        add_4[0][0]                      
__________________________________________________________________________________________________
add_5 (Add)                     (None, 200, 21)      0           conv1d_1[0][0]                   
                                                                 conv1d_2[0][0]                   
                                                                 conv1d_3[0][0]                   
                                                                 conv1d_4[0][0]                   
                                                                 conv1d_5[0][0]                   
                                                                 conv1d_6[0][0]                   
__________________________________________________________________________________________________
conv1d_7 (Conv1D)               (None, 200, 21)      1344        add_5[0][0]                      
__________________________________________________________________________________________________
add_6 (Add)                     (None, 200, 21)      0           conv1d_1[0][0]                   
                                                                 conv1d_2[0][0]                   
                                                                 conv1d_3[0][0]                   
                                                                 conv1d_4[0][0]                   
                                                                 conv1d_5[0][0]                   
                                                                 conv1d_6[0][0]                   
                                                                 conv1d_7[0][0]                   
__________________________________________________________________________________________________
reshape_1 (Reshape)             (None, 200, 1, 21)   0           add_6[0][0]                      
__________________________________________________________________________________________________
conv2d_transpose_1 (Conv2DTrans (None, 200, 1, 8)    4208        reshape_1[0][0]                  
__________________________________________________________________________________________________
reshape_2 (Reshape)             (None, 200, 8)       0           conv2d_transpose_1[0][0]         
__________________________________________________________________________________________________
global_average_pooling1d_1 (Glo (None, 21)           0           add_6[0][0]                      
__________________________________________________________________________________________________
profile/Oct4 (Lambda)           (None, 200, 2)       0           reshape_2[0][0]                  
__________________________________________________________________________________________________
profile/Sox2 (Lambda)           (None, 200, 2)       0           reshape_2[0][0]                  
__________________________________________________________________________________________________
profile/Klf4 (Lambda)           (None, 200, 2)       0           reshape_2[0][0]                  
__________________________________________________________________________________________________
profile/Nanog (Lambda)          (None, 200, 2)       0           reshape_2[0][0]                  
__________________________________________________________________________________________________
counts/Oct4 (Dense)             (None, 2)            44          global_average_pooling1d_1[0][0] 
__________________________________________________________________________________________________
counts/Sox2 (Dense)             (None, 2)            44          global_average_pooling1d_1[0][0] 
__________________________________________________________________________________________________
counts/Klf4 (Dense)             (None, 2)            44          global_average_pooling1d_1[0][0] 
__________________________________________________________________________________________________
counts/Nanog (Dense)            (None, 2)            44          global_average_pooling1d_1[0][0] 
==================================================================================================
Total params: 14,233
Trainable params: 14,233
Non-trainable params: 0
__________________________________________________________________________________________________
In [70]:
base = Model(bpnet.model.inputs[0],[bpnet.model.get_layer("add_6").output])
In [71]:
import keras.layers as kl
from keras.models import Sequential
from keras.callbacks import EarlyStopping
In [72]:
base_features = base.predict(seq)#[:,50:150]

base_features_l = base_features.reshape((-1, base_features.shape[-1]))

y_seqlets_l = y_seqlets.reshape((-1, y_seqlets.shape[-1]))

seqlet_mask_l = np.ravel(seqlet_mask).astype(bool)
In [73]:
seqlet_mask_l.shape
Out[73]:
(3617200,)
In [74]:
seqlet_mask_l.sum()
Out[74]:
19171
In [75]:
# TODO - plot class-imbalance after subsetting
In [76]:
total_counts = y_seqlets.sum(axis=1, keepdims=True) * np.ones_like(y_seqlets)
total_counts_l = total_counts.reshape((-1, total_counts.shape[-1]))
In [77]:
total_counts.shape
Out[77]:
(18086, 200, 16)
In [78]:
x = np.concatenate([base_features_l, total_counts_l], axis=1)
In [79]:
x.shape
Out[79]:
(3617200, 37)
In [80]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier

from sklearn.model_selection import cross_val_predict
In [48]:
rf = RandomForestClassifier(n_estimators=200, n_jobs=20)
In [49]:
seqlet_mask_l.sum()
Out[49]:
19171
In [50]:
#rf.fit(x[seqlet_mask_l], y_seqlets_l.argmax(-1)[seqlet_mask_l])
In [52]:
cvpred = cross_val_predict(rf, x[seqlet_mask_l],  y_seqlets_l.argmax(-1)[seqlet_mask_l], cv=5)
In [53]:
cvpred
Out[53]:
array([ 0, 15,  1, ...,  1, 15,  0])
In [58]:
skplt.metrics.plot_confusion_matrix(y_seqlets_l.argmax(-1)[seqlet_mask_l], 
                                    cvpred, figsize=(8,8),
                                    normalize=True)
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/matplotlib/cbook/deprecation.py:107: MatplotlibDeprecationWarning: Passing one of 'on', 'true', 'off', 'false' as a boolean is deprecated; use an actual boolean (True/False) instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)
Out[58]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f61300dc6d8>
In [125]:
modisco_model = Sequential([base, kl.Conv1D(y_seqlets.shape[-1], 1, activation='softmax', padding='same')])

modisco_model.compile("adam", "categorical_crossentropy", ['acc'], sample_weight_mode='temporal')
In [126]:
seqlet_mask.max()
Out[126]:
1.0
In [127]:
y_seqlets[(1-seqlet_mask).astype(bool), -1] = 1
In [128]:
(1-seqlet_mask).astype(bool).sum()
Out[128]:
3598029
In [129]:
modisco_model.fit(seq, y_seqlets, epochs=50, 
                  batch_size=256,
                  validation_split=0.2,
                  sample_weight=seqlet_mask,
                  callbacks=[EarlyStopping('val_acc', patience=3)])
Train on 14468 samples, validate on 3618 samples
Epoch 1/50
14468/14468 [==============================] - 4s 303us/step - loss: 4.2427 - acc: 1.8247e-04 - val_loss: nan - val_acc: 4.5191e-04
Epoch 2/50
14468/14468 [==============================] - 1s 72us/step - loss: 1.5674 - acc: 0.8581 - val_loss: nan - val_acc: 0.9998
Epoch 3/50
14468/14468 [==============================] - 1s 71us/step - loss: 1.2215 - acc: 0.9976 - val_loss: nan - val_acc: 0.9998
Epoch 4/50
14468/14468 [==============================] - 1s 75us/step - loss: 1.1751 - acc: 0.9975 - val_loss: nan - val_acc: 0.9997
Epoch 5/50
14468/14468 [==============================] - 1s 75us/step - loss: 1.1486 - acc: 0.9975 - val_loss: nan - val_acc: 0.9995
Epoch 6/50
14468/14468 [==============================] - 1s 82us/step - loss: 1.1315 - acc: 0.9975 - val_loss: nan - val_acc: 0.9997
Out[129]:
<keras.callbacks.History at 0x7f6054658748>
In [130]:
y_pred = modisco_model.predict(seq)
In [131]:
#fig = plt.subplot(figsize=(10,10))
# Conv-width = 15
skplt.metrics.plot_confusion_matrix(np.ravel(y_seqlets.argmax(axis=-1)), 
                                    np.ravel(y_pred.argmax(axis=-1)), 
                                    normalize=True)
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/matplotlib/cbook/deprecation.py:107: MatplotlibDeprecationWarning: Passing one of 'on', 'true', 'off', 'false' as a boolean is deprecated; use an actual boolean (True/False) instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)
Out[131]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f60631b31d0>

Old

In [389]:
W, b= modisco_model.layers[-1].get_weights()
In [408]:
nplt = W.shape[-1]
fig, axs = plt.subplots(nrows=4, ncols=4, sharex=True, sharey=True, figsize=(7,10))

for i, ax in enumerate(axs.reshape(-1)): 
    ax.imshow(W[:,:,0].T);
plt.xlabel("Filter width idx");
In [393]:
W.shape
Out[393]:
(15, 21, 16)
In [390]:
W
Out[390]:
(15, 21, 16)
In [374]:
#fig = plt.subplot(figsize=(10,10))
# Conv-width = 1
skplt.metrics.plot_confusion_matrix(np.ravel(y_seqlets.argmax(axis=-1)), 
                                    np.ravel(y_pred.argmax(axis=-1)), 
                                    normalize=True)
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/matplotlib/cbook/deprecation.py:107: MatplotlibDeprecationWarning: Passing one of 'on', 'true', 'off', 'false' as a boolean is deprecated; use an actual boolean (True/False) instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)
Out[374]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc602ae3278>
In [367]:
#fig = plt.subplot(figsize=(10,10))
# Conv-width = 9
skplt.metrics.plot_confusion_matrix(np.ravel(y_seqlets.argmax(axis=-1)), 
                                    np.ravel(y_pred.argmax(axis=-1)), 
                                    normalize=True)
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/matplotlib/cbook/deprecation.py:107: MatplotlibDeprecationWarning: Passing one of 'on', 'true', 'off', 'false' as a boolean is deprecated; use an actual boolean (True/False) instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)
Out[367]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc6043b1f60>
In [ ]:
seqlet_locations = label_seqlets(seq, seqlets)
In [63]:
mr.patterns()
Out[63]:
['pattern_0',
 'pattern_1',
 'pattern_2',
 'pattern_3',
 'pattern_4',
 'pattern_5',
 'pattern_6',
 'pattern_7',
 'pattern_8',
 'pattern_9',
 'pattern_10',
 'pattern_11',
 'pattern_12',
 'pattern_13',
 'pattern_14']

Get all seqlets (including unmapped)

In [565]:
a=1
In [566]:
mr.f.f['/metacluster_idx_to_submetacluster_results/metacluster0/seqlets'][:5]
Out[566]:
array([b'example:0,start:94,end:135,rc:False',
       b'example:1,start:67,end:108,rc:False',
       b'example:2,start:80,end:121,rc:False',
       b'example:2,start:15,end:56,rc:False',
       b'example:3,start:51,end:92,rc:False'], dtype=object)
In [409]:
mr.f.ls()
Out[409]:
[('/metacluster_idx_to_submetacluster_results/metacluster0/activity_pattern',
  <HDF5 dataset "activity_pattern": shape (1,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets',
  <HDF5 dataset "seqlets": shape (19193,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/affmat',
  <HDF5 dataset "affmat": shape (7781, 7781), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/cluster_results/cluster_indices',
  <HDF5 dataset "cluster_indices": shape (7781,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/all_pattern_names',
  <HDF5 dataset "all_pattern_names": shape (15,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_0/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (2258,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_0/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (2258,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_0/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_0/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_0/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_0/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_0/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_0/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_1/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (2020,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_1/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (2020,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_1/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_1/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_1/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_1/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_1/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_1/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_10/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (60,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_10/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (60,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_10/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_10/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_10/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_10/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_10/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_10/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_11/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (49,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_11/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (49,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_11/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_11/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_11/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_11/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_11/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_11/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_12/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (54,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_12/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (54,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_12/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_12/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_12/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_12/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_12/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_12/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_13/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (58,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_13/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (58,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_13/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_13/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_13/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_13/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_13/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_13/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_14/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (46,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_14/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (46,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_14/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_14/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_14/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_14/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_14/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_14/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_2/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (652,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_2/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (652,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_2/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_2/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_2/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_2/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_2/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_2/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_3/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (252,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_3/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (252,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_3/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_3/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_3/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_3/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_3/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_3/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_4/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (198,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_4/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (198,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_4/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_4/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_4/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_4/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_4/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_4/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_5/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (187,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_5/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (187,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_5/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_5/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_5/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_5/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_5/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_5/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_6/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (139,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_6/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (139,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_6/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_6/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_6/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_6/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_6/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_6/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_7/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (112,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_7/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (112,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_7/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_7/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_7/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_7/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_7/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_7/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_8/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (104,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_8/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (104,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_8/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_8/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_8/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_8/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_8/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_8/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_9/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (105,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_9/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (105,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_9/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_9/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_9/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_9/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_9/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster0/seqlets_to_patterns_result/patterns/pattern_9/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/activity_pattern',
  <HDF5 dataset "activity_pattern": shape (1,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/seqlets',
  <HDF5 dataset "seqlets": shape (807,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/seqlets_to_patterns_result/affmat',
  <HDF5 dataset "affmat": shape (445, 445), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/seqlets_to_patterns_result/cluster_results/cluster_indices',
  <HDF5 dataset "cluster_indices": shape (445,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/seqlets_to_patterns_result/patterns/all_pattern_names',
  <HDF5 dataset "all_pattern_names": shape (1,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/seqlets_to_patterns_result/patterns/pattern_0/seqlets_and_alnmts/alnmts',
  <HDF5 dataset "alnmts": shape (190,), type "<i8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/seqlets_to_patterns_result/patterns/pattern_0/seqlets_and_alnmts/seqlets',
  <HDF5 dataset "seqlets": shape (190,), type "|O">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/seqlets_to_patterns_result/patterns/pattern_0/sequence/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/seqlets_to_patterns_result/patterns/pattern_0/sequence/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/seqlets_to_patterns_result/patterns/pattern_0/task0_contrib_scores/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/seqlets_to_patterns_result/patterns/pattern_0/task0_contrib_scores/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/seqlets_to_patterns_result/patterns/pattern_0/task0_hypothetical_contribs/fwd',
  <HDF5 dataset "fwd": shape (70, 4), type "<f8">),
 ('/metacluster_idx_to_submetacluster_results/metacluster1/seqlets_to_patterns_result/patterns/pattern_0/task0_hypothetical_contribs/rev',
  <HDF5 dataset "rev": shape (70, 4), type "<f8">),
 ('/metaclustering_results/all_metacluster_names',
  <HDF5 dataset "all_metacluster_names": shape (2,), type "|O">),
 ('/metaclustering_results/metacluster_indices',
  <HDF5 dataset "metacluster_indices": shape (20000,), type "<i8">),
 ('/multitask_seqlet_creation_results/final_seqlets',
  <HDF5 dataset "final_seqlets": shape (20000,), type "|O">),
 ('/multitask_seqlet_creation_results/task_name_to_coord_producer_results/task0/coords',
  <HDF5 dataset "coords": shape (20000,), type "|O">),
 ('/multitask_seqlet_creation_results/task_name_to_coord_producer_results/task0/thresholding_results/densities',
  <HDF5 dataset "densities": shape (100,), type "<f8">),
 ('/multitask_seqlet_creation_results/task_name_to_coord_producer_results/task0/vals_to_threshold',
  <HDF5 dataset "vals_to_threshold": shape (75019,), type "<f8">),
 ('/task_names', <HDF5 dataset "task_names": shape (1,), type "|O">)]
In [134]:
mr.f.f['/metaclustering_results/all_metacluster_names'][:].astype(str)
Out[134]:
array(['metacluster0', 'metacluster1'], dtype='<U12')
In [412]:
mr.f.f['/multitask_seqlet_creation_results/final_seqlets'][:4]
Out[412]:
array([b'example:4032,start:64,end:105,rc:False',
       b'example:4032,start:40,end:81,rc:False',
       b'example:10468,start:88,end:129,rc:False',
       b'example:10468,start:37,end:78,rc:False'], dtype=object)

Re-plot patterns

In [65]:
mr.plot_profiles(test[0][incl], 
                 {task: test[1][f"profile/{task}"][incl] for task in bpnet.tasks},
                 legend=False,
                 seq_height=1.5,
                 n_bootstrap=None)

Create a classifier

train and evaluate

In [16]:
from concise.utils.plot import seqlogo_fig, seqlogo
import matplotlib.pyplot as plt
fig, (ax0, ax1)= plt.subplots(2, 1, sharex=True, figsize=(20, 6))

ax0.set_title("scores")
seqlogo(scores[0], ax=ax0)

ax1.set_title("scores")
seqlogo(scores[1], ax=ax1)