Goal

  • cluster the motif sites using modisco

Resources

Modisco videos

TODO

  • get the importance profiles
    • add-up the grads for two strands
In [2]:
import os
import numpy as np
from keras.models import load_model
from basepair.config import create_tf_session, get_data_dir
from basepair import models, datasets
import basepair as bp
from basepair.losses import twochannel_multinomial_nll
from basepair.layers import SpatialLifetimeSparsity
from basepair.math import softmax
from basepair.plots import *
from keras.models import load_model
from tqdm import tqdm
ddir = get_data_dir()

from basepair.data import numpy_minibatch

Setup the model and the data

In [3]:
create_tf_session(1)
Out[3]:
<tensorflow.python.client.session.Session at 0x7ff15a5607f0>
In [4]:
ckp_file = f"{ddir}/processed/chipnexus/exp/models/resnest_allconnect_nconv=7_filters=32_lr=0.004_dilated=True,out=25.h5"
In [5]:
model = load_model(ckp_file)
WARNING:tensorflow:From /users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:497: calling conv1d (from tensorflow.python.ops.nn_ops) with data_format=NHWC is deprecated and will be removed in a future version.
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
2018-05-16 17:33:31,151 [WARNING] From /users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py:497: calling conv1d (from tensorflow.python.ops.nn_ops) with data_format=NHWC is deprecated and will be removed in a future version.
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
WARNING:tensorflow:From /users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:198: retry (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Use the retry module or similar alternatives.
2018-05-16 17:33:37,301 [WARNING] From /users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:198: retry (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Use the retry module or similar alternatives.
In [6]:
train, valid, test = datasets.seq_inp_exo_out()

Plot

In [33]:
# Functions to get the gradients
out = model.outputs[0]
inp = model.inputs[0]
pos_strand_ginp_avg = K.function([inp], K.gradients(K.mean(out[:,:,0], axis=-1), inp))
neg_strand_ginp_avg = K.function([inp], K.gradients(K.mean(out[:,:,1], axis=-1), inp))
pos_strand_ginp_max = K.function([inp], K.gradients(K.max(out[:,:,0], axis=-1), inp))
neg_strand_ginp_max = K.function([inp], K.gradients(K.max(out[:,:,1], axis=-1), inp))

Get predictions and gradients

In [36]:
# Pre-compute the predictions and bottlenecks
x = valid[0]
y_true = valid[1]
y_pred = softmax(model.predict(x))
grads_pos = np.concatenate([pos_strand_ginp_max([batch])[0]
                            for batch in numpy_minibatch(x, 512)])
grads_neg = np.concatenate([neg_strand_ginp_max([batch])[0]
                            for batch in numpy_minibatch(x, 512)])
igrads_pos = grads_pos * x
igrads_neg = grads_neg * x

Correlate pos and neg-strand gradients

In [13]:
from scipy.spatial.distance import cosine, correlation
In [38]:
grads_pos_ext = grads_pos.reshape((grads_pos.shape[0], -1))
grads_neg_ext = grads_neg.reshape((grads_neg.shape[0], -1))
In [39]:
distances = np.array([correlation(grads_neg_ext[i], grads_pos_ext[i]) for i in range(len(grads_neg_ext))])
In [40]:
import numpy as np
import seaborn as sns
In [41]:
plt.figure(figsize=(6,6))
plt.subplot(211)
plt.hist(distances, bins=50);
plt.subplot(212)
values, base = np.histogram(distances, bins=40)
plt.plot(base[:-1], np.cumsum(values));
plt.grid()
plt.xlabel("Correlation Distance");
plt.ylabel("Fraction of data points");
In [ ]:
# We get roughly 1/3 of the points with high-correlation
In [42]:
top10_idx = pd.Series(np.where(distances<0.5)[0]).sample(10)
In [119]:
top10_idx = pd.Series(distances).sort_values(ascending=True).index[:10]
In [68]:
distances
Out[68]:
array([0., 0.])

Plot some

In [49]:
# Top maxcount indicies
top10_idx = pd.Series(y_true.max(1).sum(1)).sort_values(ascending=False).index[10:30]
In [46]:
# Top count indicies
top10_idx = pd.Series(y_true.sum(1).sum(1)).sort_values(ascending=False).index[:10]
In [36]:
# Random indicies
top10_idx = pd.Series(np.arange(len(y_true))).sample(10)
In [44]:
for i in top10_idx:
    fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True, figsize=(20,6))
    
    ax1.plot(np.arange(1,202), y_true[i,:,0], label="pos")#
    ax1.plot(np.arange(1,202), y_true[i,:,1], label="neg")
    ax1.set_ylabel("Observed\ncounts")
    ax1.legend()
    ax2.plot(np.arange(1,202), y_pred[i,:,0], label="pos")#
    ax2.plot(np.arange(1,202), y_pred[i,:,1], label="neg")
    ax2.set_ylabel("Predicted\n")
    ax2.legend()
    ax3.set_ylabel("Pos. strand")
    seqlogo(igrads_pos[i], ax=ax3);
    ax4.set_ylabel("Neg. strand")
    seqlogo(igrads_neg[i], ax=ax4);
    x_range = [1, 201]
    ax4.set_xticks(list(range(0, 201, 5)));

Run modisco

In [53]:
top_distances = distances<0.2
In [54]:
hyp_scores = grads_pos + grads_neg
In [55]:
hyp_scores = hyp_scores[top_distances]
In [56]:
hyp_scores = hyp_scores - hyp_scores.mean(-1, keepdims=True)
In [61]:
onehot_data = valid[0][top_distances]
In [59]:
scores = hyp_scores * one_hot_data
In [58]:
len(hyp_scores)
Out[58]:
836
In [62]:
# Visualize
import modisco.visualization
from modisco.visualization import viz_sequence

viz_sequence.plot_weights(scores[0])
viz_sequence.plot_weights(hyp_scores[0])
viz_sequence.plot_weights(onehot_data[0])
In [63]:
from imp import reload
In [64]:
%env MKL_THREADING_LAYER=GNU
env: MKL_THREADING_LAYER=GNU
In [65]:
import theano
Can not use cuDNN on context None: cannot compile with cuDNN. We got this error:
b'/tmp/try_flags_txoqtk0m.c:4:19: fatal error: cudnn.h: No such file or directory\ncompilation terminated.\n'
Mapped name None to device cuda: GeForce GTX TITAN X (0000:05:00.0)
In [66]:
import h5py
import numpy as np
%matplotlib inline
import modisco
reload(modisco)
import modisco.backend
reload(modisco.backend.theano_backend)
reload(modisco.backend)
import modisco.nearest_neighbors
reload(modisco.nearest_neighbors)
import modisco.affinitymat
reload(modisco.affinitymat.core)
reload(modisco.affinitymat.transformers)
import modisco.tfmodisco_workflow.seqlets_to_patterns
reload(modisco.tfmodisco_workflow.seqlets_to_patterns)
import modisco.tfmodisco_workflow.workflow
reload(modisco.tfmodisco_workflow.workflow)
import modisco.aggregator
reload(modisco.aggregator)
import modisco.cluster
reload(modisco.cluster.core)
reload(modisco.cluster.phenograph.core)
reload(modisco.cluster.phenograph.cluster)
import modisco.core
reload(modisco.core)
import modisco.coordproducers
reload(modisco.coordproducers)
import modisco.metaclusterers
reload(modisco.metaclusterers)

tfmodisco_results = modisco.tfmodisco_workflow.workflow.TfModiscoWorkflow()(
                task_names=["task0"],
                contrib_scores={'task0': scores},
                hypothetical_contribs={'task0': hyp_scores},
                sliding_window_size=21, 
                flank_size=10,
                histogram_bins=100, 
                percentiles_in_bandwidth=10,
                overlap_portion=0.5,     
                min_cluster_size=200,    
                threshold_for_counting_sign=1.0,
                weak_threshold_for_counting_sign=0.7, 
                one_hot=onehot_data)
On task task0
Done 0
Done 0
Done 0
Done 0
Done 0
Done 0
Done 0
Done 0
Done 0
Done 0
Done 0
Got 5243 coords
Computing thresholds
Bandwidth calculated: 0.2830379083752632
Computed threshold 0.4280355059043814
2640 coords remaining after thresholding
After resolving overlaps, got 2640 seqlets
1 activity patterns with support >= 200 out of 3 possible patterns
Metacluster sizes:  [2574]
Idx to activities:  {0: '1'}
On metacluster 0
Metacluster size 2574
Relevant tasks:  ('task0',)
Relevant signs:  (1,)
(Round 1) num seqlets: 2574
(Round 1) Computing coarse affmat
Beginning embedding computation
Computing embeddings
Finished embedding computation in 4.07 s
Starting affinity matrix computations
Normalization computed in 0.92 s
Cosine similarity mat computed in 1.63 s
Normalization computed in 0.96 s
Cosine similarity mat computed in 1.59 s
Finished affinity matrix computations in 3.35 s
(Round 1) Compute nearest neighbors from coarse affmat
Computed nearest neighbors in 0.26 s
(Round 1) Computing affinity matrix on nearest neighbors
Launching nearest neighbors affmat calculation job
Job completed in: 42.36 s
Launching nearest neighbors affmat calculation job
Job completed in: 44.08 s
(Round 1) Computed affinity matrix on nearest neighbors in 92.38 s
Filtered down to 1640 of 2574
(Round 1) Retained 1640 rows out of 2574 after filtering
(Round 1) Computing density adapted affmat
[t-SNE] Computing 31 nearest neighbors...
[t-SNE] Indexed 1640 samples in 0.003s...
[t-SNE] Computed neighbors for 1640 samples in 0.041s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1640
[t-SNE] Computed conditional probabilities for sample 1640 / 1640
[t-SNE] Mean sigma: 0.213453
(Round 1) Computing clustering
Beginning preprocessing + Louvain
Wrote graph to binary file in 0.34949421882629395 seconds
Running Louvain modularity optimization
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.7s
[Parallel(n_jobs=20)]: Done 160 tasks      | elapsed:    5.5s
[Parallel(n_jobs=20)]: Done 200 out of 200 | elapsed:    6.8s finished
Louvain completed 200 runs in 11.856637954711914 seconds
Wrote graph to binary file in 3.137890100479126 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.710238
After 10 runs, maximum modularity is Q = 0.711879
After 14 runs, maximum modularity is Q = 0.721035
After 19 runs, maximum modularity is Q = 0.721281
Louvain completed 69 runs in 26.8468234539032 seconds
Preproc + Louvain took 42.562561988830566 s
Got 11 clusters after round 1
Counts:
{1: 274, 2: 259, 3: 219, 0: 342, 6: 103, 4: 124, 10: 25, 5: 122, 9: 36, 7: 93, 8: 43}
(Round 1) Aggregating seqlets in each cluster
Aggregating for cluster 0 with 342 seqlets
Trimmed 25 out of 342
Skipped 20 seqlets
Aggregating for cluster 1 with 274 seqlets
Trimmed 21 out of 274
Skipped 19 seqlets
Aggregating for cluster 2 with 259 seqlets
Trimmed 18 out of 259
Skipped 26 seqlets
Aggregating for cluster 3 with 219 seqlets
Trimmed 33 out of 219
Skipped 18 seqlets
Aggregating for cluster 4 with 124 seqlets
Trimmed 17 out of 124
Skipped 9 seqlets
Aggregating for cluster 5 with 122 seqlets
Trimmed 15 out of 122
Skipped 10 seqlets
Aggregating for cluster 6 with 103 seqlets
Trimmed 1 out of 103
Skipped 30 seqlets
Aggregating for cluster 7 with 93 seqlets
Trimmed 0 out of 93
Skipped 7 seqlets
Aggregating for cluster 8 with 43 seqlets
Trimmed 0 out of 43
Skipped 2 seqlets
Aggregating for cluster 9 with 36 seqlets
Trimmed 1 out of 36
Skipped 2 seqlets
Skipped 1 seqlets
Aggregating for cluster 10 with 25 seqlets
Trimmed 9 out of 25
Skipped 2 seqlets
(Round 2) num seqlets: 1353
(Round 2) Computing coarse affmat
Beginning embedding computation
Computing embeddings
Finished embedding computation in 2.05 s
Starting affinity matrix computations
Normalization computed in 0.24 s
Cosine similarity mat computed in 0.43 s
Normalization computed in 0.17 s
Cosine similarity mat computed in 0.31 s
Finished affinity matrix computations in 0.74 s
(Round 2) Compute nearest neighbors from coarse affmat
Computed nearest neighbors in 0.09 s
(Round 2) Computing affinity matrix on nearest neighbors
Launching nearest neighbors affmat calculation job
Job completed in: 38.95 s
Launching nearest neighbors affmat calculation job
Job completed in: 37.83 s
(Round 2) Computed affinity matrix on nearest neighbors in 80.33 s
Not applying filtering for rounds above first round
(Round 2) Computing density adapted affmat
[t-SNE] Computing 31 nearest neighbors...
[t-SNE] Indexed 1353 samples in 0.001s...
[t-SNE] Computed neighbors for 1353 samples in 0.031s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1353
[t-SNE] Computed conditional probabilities for sample 1353 / 1353
[t-SNE] Mean sigma: 0.241569
(Round 2) Computing clustering
Beginning preprocessing + Louvain
Wrote graph to binary file in 0.3338606357574463 seconds
Running Louvain modularity optimization
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.8s
[Parallel(n_jobs=20)]: Done 160 tasks      | elapsed:    5.8s
[Parallel(n_jobs=20)]: Done 200 out of 200 | elapsed:    7.1s finished
Louvain completed 200 runs in 11.60930347442627 seconds
Wrote graph to binary file in 1.7511029243469238 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.733718
After 2 runs, maximum modularity is Q = 0.735291
After 17 runs, maximum modularity is Q = 0.73733
After 21 runs, maximum modularity is Q = 0.741903
After 38 runs, maximum modularity is Q = 0.742407
After 45 runs, maximum modularity is Q = 0.745822
Louvain completed 95 runs in 34.77601408958435 seconds
Preproc + Louvain took 49.02497124671936 s
Got 11 clusters after round 2
Counts:
{4: 146, 2: 179, 6: 136, 7: 58, 0: 273, 1: 195, 3: 164, 10: 14, 8: 26, 5: 141, 9: 21}
(Round 2) Aggregating seqlets in each cluster
Aggregating for cluster 0 with 273 seqlets
Trimmed 97 out of 273
Aggregating for cluster 1 with 195 seqlets
Trimmed 70 out of 195
Aggregating for cluster 2 with 179 seqlets
Trimmed 48 out of 179
Aggregating for cluster 3 with 164 seqlets
Trimmed 41 out of 164
Skipped 2 seqlets
Aggregating for cluster 4 with 146 seqlets
Trimmed 52 out of 146
Aggregating for cluster 5 with 141 seqlets
Trimmed 47 out of 141
Skipped 1 seqlets
Aggregating for cluster 6 with 136 seqlets
Trimmed 34 out of 136
Aggregating for cluster 7 with 58 seqlets
Trimmed 16 out of 58
Aggregating for cluster 8 with 26 seqlets
Trimmed 0 out of 26
Aggregating for cluster 9 with 21 seqlets
Trimmed 6 out of 21
Aggregating for cluster 10 with 14 seqlets
Trimmed 0 out of 14
Got 11 clusters
Splitting into subclusters...
Inspecting for spurious merging
Wrote graph to binary file in 0.45883607864379883 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.00864175
Louvain completed 21 runs in 7.418791055679321 seconds
Similarity is 0.9915791189242122; is_dissimilar is False
Inspecting for spurious merging
Wrote graph to binary file in 0.12370824813842773 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.00743437
After 11 runs, maximum modularity is Q = 0.00743438
Louvain completed 31 runs in 11.087598323822021 seconds
Similarity is 0.9666125719064538; is_dissimilar is False
Inspecting for spurious merging
Wrote graph to binary file in 0.08389449119567871 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.00616416
Louvain completed 21 runs in 7.396540403366089 seconds
Similarity is 0.9903041471106122; is_dissimilar is False
Inspecting for spurious merging
Wrote graph to binary file in 0.08758378028869629 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.00719687
Louvain completed 21 runs in 7.5061869621276855 seconds
Similarity is 0.9578167963191903; is_dissimilar is False
Inspecting for spurious merging
Wrote graph to binary file in 0.06248664855957031 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.0097135
After 2 runs, maximum modularity is Q = 0.00977209
Louvain completed 22 runs in 7.86962628364563 seconds
Similarity is 0.8867047796960423; is_dissimilar is False
Inspecting for spurious merging
Wrote graph to binary file in 0.05991816520690918 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.00710173
After 5 runs, maximum modularity is Q = 0.0076013
After 6 runs, maximum modularity is Q = 0.00789837
After 8 runs, maximum modularity is Q = 0.00789838
After 9 runs, maximum modularity is Q = 0.00794415
Louvain completed 29 runs in 11.200222253799438 seconds
Similarity is 0.9198125255059679; is_dissimilar is False
Inspecting for spurious merging
Wrote graph to binary file in 0.05480694770812988 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.006039
Louvain completed 21 runs in 7.259416103363037 seconds
Similarity is 0.9937839139031905; is_dissimilar is False
Inspecting for spurious merging
Wrote graph to binary file in 0.025652408599853516 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.00441783
Louvain completed 21 runs in 7.356609344482422 seconds
Similarity is 0.9832766474267841; is_dissimilar is False
Merging on 11 clusters
On merging iteration 1
Computing pattern to seqlet distances
Computing pattern to pattern distances
Collapsing 6 & 7 with prob 0.00010065432997607053 and sim 0.9894994146333557
Collapsing 2 & 6 with prob 0.005728751376606339 and sim 0.9868118427080703
Collapsing 2 & 7 with prob 0.00010593184585352336 and sim 0.9857639068728644
Collapsing 2 & 10 with prob 2.9183996996251507e-06 and sim 0.9830554306110789
Collapsing 6 & 10 with prob 2.379262370520105e-06 and sim 0.9819719271041318
Collapsing 0 & 1 with prob 3.128729146280328e-05 and sim 0.9402389252344008
Collapsing 3 & 4 with prob 4.8130819060371496e-05 and sim 0.9336769942767751
Collapsing 0 & 7 with prob 1.847346375318706e-05 and sim 0.9287706695517916
Collapsing 1 & 9 with prob 6.716315477693031e-06 and sim 0.9246051107174198
Aborting collapse as 0 & 9 have prob 1.5455855618930305e-06 and sim 0.8199880251332117
Aborting collapse as 2 & 9 have prob 3.6882856479444346e-07 and sim 0.7691103145251695
Aborting collapse as 6 & 9 have prob 1.182731202922759e-07 and sim 0.7699732523148903
Aborting collapse as 7 & 9 have prob 1.0815510486824477e-10 and sim 0.7971829914482823
Aborting collapse as 9 & 10 have prob 4.0957811655596546e-16 and sim 0.819591474043107
Collapsing 1 & 4 with prob 0.00010998612545030581 and sim 0.9227358962796253
Aborting collapse as 0 & 3 have prob 1.761913465653645e-07 and sim 0.6614065946412593
Aborting collapse as 2 & 3 have prob 9.493716483181483e-08 and sim 0.6838231747008559
Aborting collapse as 3 & 6 have prob 1.2069873969196685e-08 and sim 0.6595007359553019
Aborting collapse as 4 & 7 have prob 1.9923411616574536e-09 and sim 0.8537701345332559
Collapsing 0 & 2 with prob 0.004722759771534372 and sim 0.9201543929262275
Collapsing 0 & 6 with prob 0.0033287369228242464 and sim 0.9131641148605492
Collapsing 1 & 2 with prob 4.864625057202676e-05 and sim 0.9037125163222294
Collapsing 4 & 9 with prob 1.4609203919623138e-05 and sim 0.8962335342080402
Collapsing 3 & 9 with prob 9.932020349658885e-05 and sim 0.8929559002724741
Collapsing 1 & 6 with prob 1.0330548110699781e-05 and sim 0.888845811842996
Collapsing 3 & 5 with prob 0.000754306357182347 and sim 0.885553933815357
Aborting collapse as 5 & 9 have prob 6.371658492874202e-06 and sim 0.7880732989660111
Trimmed 0 out of 131
Trimmed 0 out of 262
Trimmed 0 out of 269
Trimmed 0 out of 301
Trimmed 0 out of 201
Trimmed 1 out of 566
Trimmed 15 out of 216
On merging iteration 2
Computing pattern to seqlet distances
Computing pattern to pattern distances
Collapsing 1 & 2 with prob 0.0021072503551748216 and sim 0.8867061282702535
Trimmed 2 out of 294
On merging iteration 3
Computing pattern to seqlet distances
Computing pattern to pattern distances
Got 3 patterns after merging
Performing seqlet reassignment
Cross contin jaccard time taken: 3.21 s
Cross contin jaccard time taken: 3.51 s
Discarded 3 seqlets
Skipped 68 seqlets
Skipped 13 seqlets
Got 2 patterns after reassignment
Total time taken is 414.38s
In [74]:
mkdir -p {ddir}/processed/chipnexus/motifs/sox2/modisco
In [76]:
!du -sh {ddir}/processed/chipnexus/motifs/sox2/modisco
15M	/users/avsec/workspace/basepair/basepair/../data/processed/chipnexus/motifs/sox2/modisco
In [77]:
modisco_file = f"{ddir}/processed/chipnexus/motifs/sox2/modisco/defaults.hdf5"
In [75]:
import h5py
import modisco.util
reload(modisco.util)
grp = h5py.File(modisco_file)
tfmodisco_results.save_hdf5(grp)
In [78]:
from collections import Counter
from modisco.visualization import viz_sequence
reload(viz_sequence)
from matplotlib import pyplot as plt

import modisco.affinitymat.core
reload(modisco.affinitymat.core)
import modisco.cluster.phenograph.core
reload(modisco.cluster.phenograph.core)
import modisco.cluster.phenograph.cluster
reload(modisco.cluster.phenograph.cluster)
import modisco.cluster.core
reload(modisco.cluster.core)
import modisco.aggregator
reload(modisco.aggregator)

import sklearn.decomposition
import sklearn.manifold

hdf5_results = h5py.File(modisco_file)

#patterns = (tfmodisco_results
#            .metacluster_idx_to_submetacluster_results[0]
#            .seqlets_to_patterns_result.patterns);
patterns = (list(hdf5_results
                 ["metacluster_idx_to_submetacluster_results"]
                 ["metacluster0"]
                 ["seqlets_to_patterns_result"]
                 ["patterns"]["all_pattern_names"]))
print(len(patterns))
pattern_grp = (hdf5_results
                 ["metacluster_idx_to_submetacluster_results"]
                 ["metacluster0"]
                 ["seqlets_to_patterns_result"]
                 ["patterns"])

for pattern_name in patterns:
    pattern = pattern_grp[pattern_name]
    print(pattern_name)
    print("total seqlets:",len(pattern["seqlets_and_alnmts"]["seqlets"]))
    #pattern.plot_counts(counts=aggregated_seqlet.get_per_position_seqlet_center_counts())
    background = np.array([0.27, 0.23, 0.23, 0.27])
    print("fwd:")
    viz_sequence.plot_weights(pattern["task0_contrib_scores"]["fwd"])
    viz_sequence.plot_weights(pattern["task0_hypothetical_contribs"]["fwd"])
    viz_sequence.plot_weights(viz_sequence.ic_scale(np.array(pattern["sequence"]["fwd"]),
                                                    background=background))

    print("reverse:")
    viz_sequence.plot_weights(pattern["task0_contrib_scores"]["rev"])
    viz_sequence.plot_weights(pattern["task0_hypothetical_contribs"]["rev"])
    viz_sequence.plot_weights(viz_sequence.ic_scale(np.array(pattern["sequence"]["rev"]),
                                                    background=background))
2
b'pattern_0'
total seqlets: 501
fwd:
reverse:
b'pattern_1'
total seqlets: 282
fwd:
reverse: