In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from collections import OrderedDict
In [3]:
from matlas.pwms import ic_scale, adjust_for_ic_scale
from matlas.matches import vdom_pssm
from modisco.visualization import viz_sequence
from matlas.matches import vdom_pssm
In [5]:
motif_name = 'CEBPB'

top_k_filters = OrderedDict()
filt_infl_from_loss = OrderedDict()
for fold_no in range(1,11):
    aitacdir = "/mnt/lab_data2/msharmin/oc-atlas/DanSkinData/alex_bpnets/{0}/{1}".format(motif_name, fold_no)
    filt_infl_from_loss[fold_no] = np.load(aitacdir+"/filt_infl_from_loss.npy")
    top_k_filters[fold_no] = np.flip(np.argsort(filt_infl_from_loss[fold_no]))[:15]
    print(fold_no, top_k_filters[fold_no])
1 [47 41 60 33 56 38  8  0 53  5 43 63 13  2 48]
2 [14 29  1  5  9 40 58 63 46 60 22 55 39 25 24]
3 [10 32 51 54 15 29  6 39  7 11 38 35 46 18 55]
4 [47  3 43 21 44 42 11  4 52  6  9 50 20 39 31]
5 [36  3  2 53 14 49 47 27 38 11 26 17 30 42 50]
6 [49 35 32 11 34 58 46 24  3  1 57 54 38 26 12]
7 [54  8 32  0  7 45 16 57  4 60  3 15 50 40 43]
8 [17 31 56 11  4 26 63 62  7 50  0 14 19 18 44]
9 [25 30 21 12  4 55 61  7 36 40 23  8 11 37 51]
10 [25 26  6  2 48 51 11 43 63  7 28 54 42 47 18]
In [6]:
import numpy as np
import seaborn as sns

motif_name = 'CEBPB'
filter_weights = OrderedDict()
for fold_no in range(1,11):
    filter_weights[fold_no] = np.load("/mnt/lab_data2/msharmin/oc-atlas/DanSkinData/alex_bpnets/filters/{0}/{0}_fold{1}_filters.npy".format(
    motif_name, fold_no))
In [15]:
from matplotlib import pyplot as plt

from matlas.pwms import ic_scale, adjust_for_ic_scale
from matlas.matches import vdom_pssm
from modisco.visualization import viz_sequence
from matlas.matches import vdom_pssm


def draw_filter_weights(start_filter, end_filter):
    
    alphabets = np.array(['A', 'C', 'G', 'T'])

    fig, axes = plt.subplots(4, 4, figsize=(30,10))

    for i in range(start_filter, end_filter):
        a = np.copy(filter_weights[:,:,i].T)
        r,c = (i-start_filter)//4, i%4
        #print(r, c)
        ax = sns.heatmap(a, yticklabels=alphabets,
                    xticklabels=alphabets[np.argmax(a, axis=0)], ax = axes[r, c]
                   )
        ax.set_ylabel("filter{}".format(i))
        
    return None

Raw and mean normalized filter weights

In [16]:
from modisco.visualization import viz_sequence

for fold_no in range(1,11):
    for i in top_k_filters[fold_no]:
        print('fold_no', fold_no, 'filter', i)
        mean_norm_weights = np.copy(filter_weights[fold_no][:,:,i])
        mean_norm_weights = mean_norm_weights - mean_norm_weights.mean(1, keepdims=True)
        print('raw weights')
        viz_sequence.plot_weights(filter_weights[fold_no][:,:,i])
        print('mean normalized weights')
        viz_sequence.plot_weights(mean_norm_weights)
        break
fold_no 1 filter 47
raw weights
mean normalized weights
fold_no 2 filter 14
raw weights
mean normalized weights
fold_no 3 filter 10
raw weights
mean normalized weights
fold_no 4 filter 47
raw weights
mean normalized weights
fold_no 5 filter 36
raw weights
mean normalized weights
fold_no 6 filter 49
raw weights
mean normalized weights
fold_no 7 filter 54
raw weights
mean normalized weights
fold_no 8 filter 17
raw weights
mean normalized weights
fold_no 9 filter 25
raw weights
mean normalized weights
fold_no 10 filter 25
raw weights
mean normalized weights
In [17]:
from matlas.matches import vdom_pssm
from IPython.display import display

from vdom.helpers import (h1, p, li, img, div, b, br, ul, img, a, 
                          details, summary,
                          table, thead, th, tr, tbody, td, ol)

for fold_no in range(1,11):
    items_to_display = []
    for i in top_k_filters[fold_no]:
        filter_name = 'filter{}'.format(i)
        mean_norm_weights = np.copy(filter_weights[fold_no][:,:,i])
        mean_norm_weights = mean_norm_weights - mean_norm_weights.mean(1, keepdims=True)
        un_query = vdom_pssm(filter_weights[fold_no][:,:,i])
        n_query = vdom_pssm(mean_norm_weights)
        items_to_display.append(summary(b(filter_name), p('raw weights'), un_query, 'mean normalized', n_query))
        #break
    display(details(summary(b("weights of selected filters in fold {}".format(fold_no))), summary(items_to_display)))
weights of selected filters in fold 1filter47

raw weights

mean normalized
filter41

raw weights

mean normalized
filter60

raw weights

mean normalized
filter33

raw weights

mean normalized
filter56

raw weights

mean normalized
filter38

raw weights

mean normalized
filter8

raw weights

mean normalized
filter0

raw weights

mean normalized
filter53

raw weights

mean normalized
filter5

raw weights

mean normalized
filter43

raw weights

mean normalized
filter63

raw weights

mean normalized
filter13

raw weights

mean normalized
filter2

raw weights

mean normalized
filter48

raw weights

mean normalized
weights of selected filters in fold 2filter14

raw weights

mean normalized
filter29

raw weights

mean normalized
filter1

raw weights

mean normalized
filter5

raw weights

mean normalized
filter9

raw weights

mean normalized
filter40

raw weights

mean normalized
filter58

raw weights

mean normalized
filter63

raw weights

mean normalized
filter46

raw weights

mean normalized
filter60

raw weights

mean normalized
filter22

raw weights

mean normalized
filter55

raw weights

mean normalized
filter39

raw weights

mean normalized
filter25

raw weights

mean normalized
filter24

raw weights

mean normalized
weights of selected filters in fold 3filter10

raw weights

mean normalized
filter32

raw weights

mean normalized
filter51

raw weights

mean normalized
filter54

raw weights

mean normalized
filter15

raw weights

mean normalized
filter29

raw weights

mean normalized
filter6

raw weights

mean normalized
filter39

raw weights

mean normalized
filter7

raw weights

mean normalized
filter11

raw weights

mean normalized
filter38

raw weights

mean normalized
filter35

raw weights

mean normalized
filter46

raw weights

mean normalized
filter18

raw weights

mean normalized
filter55

raw weights

mean normalized
weights of selected filters in fold 4filter47

raw weights

mean normalized
filter3

raw weights

mean normalized
filter43

raw weights

mean normalized
filter21

raw weights

mean normalized
filter44

raw weights

mean normalized
filter42

raw weights

mean normalized
filter11

raw weights

mean normalized
filter4

raw weights

mean normalized
filter52

raw weights

mean normalized
filter6

raw weights

mean normalized
filter9

raw weights

mean normalized
filter50

raw weights

mean normalized
filter20

raw weights

mean normalized
filter39

raw weights

mean normalized
filter31

raw weights

mean normalized
weights of selected filters in fold 5filter36

raw weights

mean normalized
filter3

raw weights

mean normalized
filter2

raw weights

mean normalized
filter53

raw weights

mean normalized
filter14

raw weights

mean normalized
filter49

raw weights

mean normalized
filter47

raw weights

mean normalized
filter27

raw weights

mean normalized
filter38

raw weights

mean normalized
filter11

raw weights

mean normalized
filter26

raw weights

mean normalized
filter17

raw weights

mean normalized
filter30

raw weights

mean normalized
filter42

raw weights

mean normalized
filter50

raw weights

mean normalized
weights of selected filters in fold 6filter49

raw weights

mean normalized
filter35

raw weights

mean normalized
filter32

raw weights

mean normalized
filter11

raw weights

mean normalized
filter34

raw weights

mean normalized
filter58

raw weights

mean normalized
filter46

raw weights

mean normalized
filter24

raw weights

mean normalized
filter3

raw weights

mean normalized
filter1

raw weights

mean normalized
filter57

raw weights

mean normalized
filter54

raw weights

mean normalized
filter38

raw weights

mean normalized
filter26

raw weights

mean normalized
filter12

raw weights

mean normalized
weights of selected filters in fold 7filter54

raw weights

mean normalized
filter8

raw weights

mean normalized
filter32

raw weights

mean normalized
filter0

raw weights

mean normalized
filter7

raw weights

mean normalized
filter45

raw weights

mean normalized
filter16

raw weights

mean normalized
filter57

raw weights

mean normalized
filter4

raw weights

mean normalized
filter60

raw weights

mean normalized
filter3

raw weights

mean normalized
filter15

raw weights

mean normalized
filter50

raw weights

mean normalized
filter40

raw weights

mean normalized
filter43

raw weights

mean normalized
weights of selected filters in fold 8filter17

raw weights

mean normalized
filter31

raw weights

mean normalized
filter56

raw weights

mean normalized
filter11

raw weights

mean normalized
filter4

raw weights

mean normalized
filter26

raw weights

mean normalized
filter63

raw weights

mean normalized
filter62

raw weights

mean normalized
filter7

raw weights

mean normalized
filter50

raw weights

mean normalized
filter0

raw weights

mean normalized
filter14

raw weights

mean normalized
filter19

raw weights

mean normalized
filter18

raw weights

mean normalized
filter44

raw weights

mean normalized
weights of selected filters in fold 9filter25

raw weights

mean normalized
filter30

raw weights

mean normalized
filter21

raw weights

mean normalized
filter12

raw weights

mean normalized
filter4

raw weights

mean normalized
filter55

raw weights

mean normalized
filter61

raw weights

mean normalized
filter7

raw weights

mean normalized
filter36

raw weights

mean normalized
filter40

raw weights

mean normalized
filter23

raw weights

mean normalized
filter8

raw weights

mean normalized
filter11

raw weights

mean normalized
filter37

raw weights

mean normalized
filter51

raw weights

mean normalized
weights of selected filters in fold 10filter25

raw weights

mean normalized
filter26

raw weights

mean normalized
filter6

raw weights

mean normalized
filter2

raw weights

mean normalized
filter48

raw weights

mean normalized
filter51

raw weights

mean normalized
filter11

raw weights

mean normalized
filter43

raw weights

mean normalized
filter63

raw weights

mean normalized
filter7

raw weights

mean normalized
filter28

raw weights

mean normalized
filter54

raw weights

mean normalized
filter42

raw weights

mean normalized
filter47

raw weights

mean normalized
filter18

raw weights

mean normalized
In [18]:
from matlas.pwms import ic_scale, adjust_for_ic_scale
from matlas.matches import vdom_pssm
from sklearn.preprocessing import MinMaxScaler
from collections import OrderedDict

def ic_scale_filter_weights(filter_weights, filter_count=64):
    norm_weights = np.copy(filter_weights)
    norm_weights = (norm_weights-np.mean(norm_weights))/np.std(norm_weights)
    norm_weights[norm_weights>2] = 2.0
    norm_weights[norm_weights<-2] = -2.0

    ic_scaled_weights = OrderedDict()
    for i in range(filter_count):
        weights = np.copy(norm_weights[:,:,i])
        scaler = MinMaxScaler()
        arr = scaler.fit_transform(weights)

        #convert to probability
        arr = arr / arr.sum(1, keepdims=True) #divide by colsum
        arr = arr + 0.01  # add pseudo-counts
        probs = arr / arr.sum(1, keepdims=True)
        #probs = adjust_for_ic_scale(probs)
        try:
            ic_scaled_weights['filter{0}'.format(i)] = ic_scale(probs)
        except AssertionError:
            print('filter{0}'.format(i))
            return probs

    return ic_scaled_weights


ic_scaled_weights = OrderedDict()
for fold_no in range(1,11):
    ic_scaled_weights[fold_no] = ic_scale_filter_weights(filter_weights[fold_no])

filter weights in IC scale

All filter weights are standardised and each filter weights are scaled to 0-1 and converted to probability score before IC-scaling

In [10]:
# from IPython.display import display
# from matlas.pwms import ic_scale

# from vdom.helpers import (h1, p, li, img, div, b, br, ul, img, a, 
#                           details, summary,
#                           table, thead, th, tr, tbody, td, ol)

# items_to_display = []
# for i in range(64):
#     filter_name = 'filter{}'.format(i)
#     query = vdom_pssm(ic_scaled_weights[filter_name])
#     items_to_display.append(summary(filter_name, query))

# display(details(summary(b("filter weights in IC scale")), summary(items_to_display)))
In [24]:
#4,7,57

for fold_no in range(1,11):
    selected_items_to_display = []
    for i in top_k_filters[fold_no]:
        filter_name = 'filter{}'.format(i)
        query = vdom_pssm(ic_scaled_weights[fold_no][filter_name])
        selected_items_to_display.append(summary(filter_name, query))

    display(details(summary(b("Click here to see the filter weights in IC scale for fold {}".format(fold_no))), 
                    summary(selected_items_to_display)))
Click here to see the filter weights in IC scale for fold 1filter47filter41filter60filter33filter56filter38filter8filter0filter53filter5filter43filter63filter13filter2filter48
Click here to see the filter weights in IC scale for fold 2filter14filter29filter1filter5filter9filter40filter58filter63filter46filter60filter22filter55filter39filter25filter24
Click here to see the filter weights in IC scale for fold 3filter10filter32filter51filter54filter15filter29filter6filter39filter7filter11filter38filter35filter46filter18filter55
Click here to see the filter weights in IC scale for fold 4filter47filter3filter43filter21filter44filter42filter11filter4filter52filter6filter9filter50filter20filter39filter31
Click here to see the filter weights in IC scale for fold 5filter36filter3filter2filter53filter14filter49filter47filter27filter38filter11filter26filter17filter30filter42filter50
Click here to see the filter weights in IC scale for fold 6filter49filter35filter32filter11filter34filter58filter46filter24filter3filter1filter57filter54filter38filter26filter12
Click here to see the filter weights in IC scale for fold 7filter54filter8filter32filter0filter7filter45filter16filter57filter4filter60filter3filter15filter50filter40filter43
Click here to see the filter weights in IC scale for fold 8filter17filter31filter56filter11filter4filter26filter63filter62filter7filter50filter0filter14filter19filter18filter44
Click here to see the filter weights in IC scale for fold 9filter25filter30filter21filter12filter4filter55filter61filter7filter36filter40filter23filter8filter11filter37filter51
Click here to see the filter weights in IC scale for fold 10filter25filter26filter6filter2filter48filter51filter11filter43filter63filter7filter28filter54filter42filter47filter18

importance score of the filtered sequences

In [13]:
imp_scores = np.load(aitacdir+"/imp_scores.npy")
rev_imp_scores = np.load(aitacdir+"/rev_imp_scores.npy")
OCR_matrix = np.load(aitacdir+"/OCR_matrix_0.npy")
activations2 = np.load(aitacdir+"/activations2_0.npy")
seq_indices_of_activation = np.load(aitacdir+"/seq_indices_of_activation_0.npy")

already_visited = []
for filter_no in top_k_filters:
    filter_name = "filter{}".format(filter_no)
    print(filter_name)
    activated_seqs = np.argwhere(OCR_matrix[filter_no,:]==1)[:,0]; print(activated_seqs.shape)
    activated_scores = imp_scores[activated_seqs]; print(activated_scores.shape)
    for i in range(20):
        if i in already_visited: continue
        print('seq', activated_seqs[i])
        viz_sequence.plot_weights(activated_scores[i, 600:746,:],
                                  subticks_frequency=10, figsize=(20,1))
        break
    break
filter54
(8464,)
(8464, 1346, 4)
seq 0
In [15]:
from matlas.sliding_similarities import aggregate_seqlets


seqlen = imp_scores.shape[1]
for filter_no in top_k_filters:
    activated_subseqs = np.argwhere(seq_indices_of_activation[filter_no]!=0); #print(activated_subseqs.shape)
    activated_subscores = []
    rev_activated_subscores = []
    sub_activations = []
    rev_sub_activations = []
    for i,j,k in zip(activated_subseqs.T[0], activated_subseqs.T[1], activated_subseqs.T[1]+21):
        activated_subscores.append(imp_scores[i, j:k, :])
        rev_activated_subscores.append(rev_imp_scores[i, (seqlen-k):(seqlen-j), :])
        sub_activations.append(activations2[i, filter_no, j])

    activated_subscores = np.array(activated_subscores); #print(activated_subscores.shape) 
    rev_activated_subscores = np.array(rev_activated_subscores); #print(rev_activated_subscores.shape)
    sub_activations = np.array(sub_activations)
    sub_act_ind = np.flip(np.argsort(sub_activations))
    activated_subscores = activated_subscores[sub_act_ind]
    rev_activated_subscores = rev_activated_subscores[sub_act_ind]
    
    avg_activated_subscores = aggregate_seqlets(activated_subscores[:100], 
                                                rev_activated_subscores[:100], 'sum'); #print(avg_activated_subscores.shape)
    print('Aggregated importance scores using direct sum for filter-{}'.format(filter_no))
    viz_sequence.plot_weights(avg_activated_subscores)
#     print("filter{}".format(filter_no))
#     print('running average')
#     viz_sequence.plot_weights(avg_activated_subscores)
#     viz_sequence.plot_weights(aggregate_seqlets(activated_subscores, rev_activated_subscores, 'old'))
    #break         
Aggregated number of sequences is 68
Aggregated importance scores using direct sum for filter-54
Aggregated number of sequences is 70
Aggregated importance scores using direct sum for filter-8
Aggregated number of sequences is 68
Aggregated importance scores using direct sum for filter-32
Aggregated number of sequences is 75
Aggregated importance scores using direct sum for filter-0
Aggregated number of sequences is 82
Aggregated importance scores using direct sum for filter-7
Aggregated number of sequences is 66
Aggregated importance scores using direct sum for filter-45
Aggregated number of sequences is 66
Aggregated importance scores using direct sum for filter-16
Aggregated number of sequences is 67
Aggregated importance scores using direct sum for filter-57
Aggregated number of sequences is 77
Aggregated importance scores using direct sum for filter-4
Aggregated number of sequences is 72
Aggregated importance scores using direct sum for filter-60
Aggregated number of sequences is 69
Aggregated importance scores using direct sum for filter-3
Aggregated number of sequences is 71
Aggregated importance scores using direct sum for filter-15
Aggregated number of sequences is 74
Aggregated importance scores using direct sum for filter-50
Aggregated number of sequences is 71
Aggregated importance scores using direct sum for filter-40
Aggregated number of sequences is 70
Aggregated importance scores using direct sum for filter-43

aitac motifs

In [17]:
from matlas.matches import DenovoAitac
motif_name = 'CEBPB'
obs = OrderedDict()
for fold_no in range(1,11):
    aitacdir = "/mnt/lab_data2/msharmin/oc-atlas/DanSkinData/alex_bpnets/{0}/{1}".format(motif_name, fold_no)

    ob = DenovoAitac(aitacdir, filt_infl_from_loss[fold_no], 
                     top_k_filters[fold_no], len(top_k_filters[fold_no]))
    ob.fetch_tomtom_matches(
                meme_db="/mnt/lab_data/kundaje/users/msharmin/annotations/HOCOMOCOv11_core_pwms_HUMAN_mono.renamed.nonredundant.annotated.meme",
                database_name="HOCOMOCO.nonredundant.annotated",
                save_report=True, tomtom_dir= "{0}/{1}_tomtomout".format(aitacdir, "HOCOMOCO.nonredundant.annotated"))
# #     ob.fetch_tomtom_matches(
# #                 meme_db="/mnt/lab_data/kundaje/users/msharmin/annotations/HOCOMOCOv11_core_pwms_HUMAN_mono.renamed.nonredundant.annotated.meme",
# #                 database_name="HOCOMOCO.nonredundant.annotated",
# #                 save_report=True, tomtom_dir= "{0}/{1}_tomtomout".format(aitacdir, "HOCOMOCO.nonredundant.annotated"))


#     ob.load_matched_motifs(database_name="HOCOMOCO.nonredundant.annotated")
#     ob.get_motif_per_celltype(match_threshold=0.02, match_criteria='p-value', database_name="HOCOMOCO.nonredundant.annotated")
#     pattern_tab, pattern_dict = ob.visualize_pattern_table()
#     tf_tab, tf_dict = ob.visualize_tf_table("Aitac")
    
#     display(details(summary('Click here for ', b('Denovo Patterns'), ' by ', b('{}'.format('Aitac')),
#                             ' in ', b(motif_name), 'for fold-', b(str(fold_no)),
#                             ": #{}".format(len(pattern_dict)),
#                            ), pattern_tab))
    
#     display(details(summary('Click here for ', b('Motifs'), ' by ', b('{}'.format('Aitac')),
#                         ' in ', b(motif_name), 'for fold-', b(str(fold_no)),
#                         ": #{}".format(len(tf_dict)),
#                        ), tf_tab))
#     break
    
#     obs[fold_no] = ob
In [19]:
from vdom.helpers import (b, summary, details)
from IPython.display import display

ob_tofold = OrderedDict()
pattern_tab_tofold, pattern_dict_tofold = OrderedDict(), OrderedDict()
tf_tab_tofold, tf_dict_tofold = OrderedDict(), OrderedDict()

for fold_no in range(1,11):
    aitacdir = "/mnt/lab_data2/msharmin/oc-atlas/DanSkinData/alex_bpnets/{0}/{1}".format(motif_name, fold_no)

    ob = DenovoAitac(aitacdir, filt_infl_from_loss[fold_no], 
                     top_k_filters[fold_no], len(top_k_filters[fold_no]))
    ob.load_matched_motifs(database_name="HOCOMOCO.nonredundant.annotated")
    ob.get_motif_per_celltype(match_threshold=0.02, match_criteria='p-value', database_name="HOCOMOCO.nonredundant.annotated")
    pattern_tab, pattern_dict = ob.visualize_pattern_table()
    tf_tab, tf_dict = ob.visualize_tf_table("Aitac")
    
    ob_tofold[fold_no] = ob
    pattern_tab_tofold[fold_no], pattern_dict_tofold[fold_no] = pattern_tab, pattern_dict
    tf_tab_tofold[fold_no], tf_dict_tofold[fold_no] = tf_tab, tf_dict
    

    display(details(summary('Click here for ', b('Denovo Patterns'), ' by ', b('{}'.format('Aitac')),
                            ' in ', b(motif_name), ' for fold-', b(str(fold_no)),
                            ": #{}".format(len(pattern_dict)),
                           ), pattern_tab))
    #break
Click here for Denovo Patterns by Aitac in CEBPB for fold-1: #13
Pattern NameTF Name(s)AitacInfluence
filter470.2141450319170016
filter410.026994974563086676
filter33HCLUST-176_CBFB.UNK.0.A, HCLUST-127_FOXJ2.UNK.0.A, HCLUST-51_ZIM3.UNK.0.A, HCLUST-121_AR.UNK.0.A, HCLUST-161_GLI3.UNK.0.A0.003863943019422465
filter56HCLUST-185_EGR1.UNK.0.A, HCLUST-139_SREBF1.UNK.0.A, HCLUST-176_CBFB.UNK.0.A, HCLUST-8_TBX21.UNK.0.A, HCLUST-116_KLF8.UNK.0.A, HCLUST-161_GLI3.UNK.0.A, HCLUST-184_KLF12.UNK.0.A0.002708672948704476
filter38HCLUST-176_CBFB.UNK.0.A, HCLUST-51_ZIM3.UNK.0.A, HCLUST-121_AR.UNK.0.A, HCLUST-127_FOXJ2.UNK.0.A, HCLUST-108_FOXH1.UNK.0.A, HCLUST-14_HOXA13.UNK.0.A0.0024906331847179687
filter8HCLUST-16_RFX1.UNK.0.A0.0016528585249671397
filter0HCLUST-116_KLF8.UNK.0.A, HCLUST-119_AHR.UNK.0.A, HCLUST-102_NKX2-1.UNK.0.A, HCLUST-139_SREBF1.UNK.0.A0.0014720499705049757
filter53HCLUST-14_HOXA13.UNK.0.A, HCLUST-74_SRF.UNK.0.A, HCLUST-120_AIRE.UNK.0.A, HCLUST-127_FOXJ2.UNK.0.A, HCLUST-123_CDX2.UNK.0.A, HCLUST-76_TBP.UNK.0.A, HCLUST-121_AR.UNK.0.A, HCLUST-85_ZNF354A.UNK.0.A, HCLUST-13_CDX1.UNK.0.A0.0009324956345416156
filter5HCLUST-134_INSM1.UNK.0.A, HCLUST-185_EGR1.UNK.0.A, HCLUST-122_TFAP2B.UNK.0.A, HCLUST-116_KLF8.UNK.0.A, HCLUST-27_TAL1.UNK.0.A, HCLUST-63_ZNF320.UNK.0.A, HCLUST-149_CTCFL.UNK.0.A, HCLUST-40_ZNF816.UNK.0.A, HCLUST-183_E2F1.UNK.0.A, HCLUST-1_GATA1.UNK.0.A,

HCLUST-68_ZNF341.UNK.0.A
0.0009066441587253029
filter43HCLUST-34_ZNF586.UNK.0.A, HCLUST-182_NFATC1.UNK.0.A, HCLUST-107_FEZF1.UNK.0.A, HCLUST-127_FOXJ2.UNK.0.A, HCLUST-25_SOX17.UNK.0.A, HCLUST-121_AR.UNK.0.A, HCLUST-24_SOX5.UNK.0.A, HCLUST-79_ZFP82.UNK.0.A, HCLUST-177_MEF2A.UNK.0.A0.000905770955833986
filter13HCLUST-4_ATOH1.UNK.0.A0.0006970092021308099
filter2HCLUST-41_ZNF41.UNK.0.A, HCLUST-152_SOX10.UNK.0.A, HCLUST-107_FEZF1.UNK.0.A, HCLUST-159_EHF.UNK.0.A, HCLUST-25_SOX17.UNK.0.A, HCLUST-21_ZNF350.UNK.0.A, HCLUST-40_ZNF816.UNK.0.A, HCLUST-76_TBP.UNK.0.A, HCLUST-98_EBF1.UNK.0.A0.00053401885547055
filter48HCLUST-158_BCL11A.UNK.0.A, HCLUST-72_PAX6.UNK.0.A, HCLUST-88_ZFP28.UNK.0.A, HCLUST-165_LEF1.UNK.0.A, HCLUST-151_HSF1.UNK.0.A, HCLUST-29_ZNF547.UNK.0.A0.00039986277388253343
Click here for Denovo Patterns by Aitac in CEBPB for fold-2: #13
Pattern NameTF Name(s)AitacInfluence
filter140.5303529192653758
filter29HCLUST-118_MTF1.UNK.0.A, HCLUST-52_ZKSCAN1.UNK.0.A0.04881478023585919
filter1HCLUST-176_CBFB.UNK.0.A, HCLUST-31_ZNF449.UNK.0.A, HCLUST-102_NKX2-1.UNK.0.A, HCLUST-121_AR.UNK.0.A0.0038786480474260663
filter5HCLUST-176_CBFB.UNK.0.A, HCLUST-161_GLI3.UNK.0.A, HCLUST-185_EGR1.UNK.0.A, HCLUST-116_KLF8.UNK.0.A, HCLUST-51_ZIM3.UNK.0.A, HCLUST-139_SREBF1.UNK.0.A0.00339283013944702
filter9HCLUST-106_ATF6.UNK.0.A0.002978532269916345
filter40HCLUST-74_SRF.UNK.0.A, HCLUST-13_CDX1.UNK.0.A, HCLUST-123_CDX2.UNK.0.A, HCLUST-46_PRDM14.UNK.0.A, HCLUST-87_ZBTB48.UNK.0.A, HCLUST-14_HOXA13.UNK.0.A, HCLUST-177_MEF2A.UNK.0.A0.0020161561346765744
filter58HCLUST-47_REST.UNK.0.A, HCLUST-90_ZBTB6.UNK.0.A0.0011998207688339757
filter63HCLUST-153_CRX.UNK.0.A, HCLUST-21_ZNF350.UNK.0.A, HCLUST-182_NFATC1.UNK.0.A, HCLUST-85_ZNF354A.UNK.0.A, HCLUST-6_PPARA.UNK.0.A, HCLUST-25_SOX17.UNK.0.A0.0009286532818935663
filter46HCLUST-14_HOXA13.UNK.0.A, HCLUST-120_AIRE.UNK.0.A, HCLUST-93_NR2E3.UNK.0.A0.0007539648678227104
filter60HCLUST-106_ATF6.UNK.0.A0.000728994850062196
filter39HCLUST-31_ZNF449.UNK.0.A0.0006705622938474953
filter25HCLUST-40_ZNF816.UNK.0.A, HCLUST-185_EGR1.UNK.0.A, HCLUST-159_EHF.UNK.0.A, HCLUST-68_ZNF341.UNK.0.A, HCLUST-135_MBD2.UNK.0.A, HCLUST-39_ZNF770.UNK.0.A, HCLUST-184_KLF12.UNK.0.A0.0006671001315200462
filter24HCLUST-36_ZNF680.UNK.0.A, HCLUST-34_ZNF586.UNK.0.A, HCLUST-25_SOX17.UNK.0.A, HCLUST-42_ZNF85.UNK.0.A, HCLUST-182_NFATC1.UNK.0.A, HCLUST-24_SOX5.UNK.0.A, HCLUST-121_AR.UNK.0.A, HCLUST-127_FOXJ2.UNK.0.A, HCLUST-73_SMARCA1.UNK.0.A, HCLUST-107_FEZF1.UNK.0.A,

HCLUST-177_MEF2A.UNK.0.A
0.0006218640672498633
Click here for Denovo Patterns by Aitac in CEBPB for fold-3: #4
Pattern NameTF Name(s)AitacInfluence
filter29HCLUST-176_CBFB.UNK.0.A, HCLUST-121_AR.UNK.0.A, HCLUST-161_GLI3.UNK.0.A, HCLUST-116_KLF8.UNK.0.A0.0015731521016156245
filter39HCLUST-185_EGR1.UNK.0.A, HCLUST-176_CBFB.UNK.0.A, HCLUST-139_SREBF1.UNK.0.A, HCLUST-161_GLI3.UNK.0.A, HCLUST-51_ZIM3.UNK.0.A0.0013639433753971403
filter18HCLUST-161_GLI3.UNK.0.A, HCLUST-81_THAP1.UNK.0.A, HCLUST-27_TAL1.UNK.0.A0.00022937509436457652
filter55HCLUST-132_ALX1.UNK.0.A, HCLUST-42_ZNF85.UNK.0.A, HCLUST-85_ZNF354A.UNK.0.A0.00020788976622628446
Click here for Denovo Patterns by Aitac in CEBPB for fold-4: #2
Pattern NameTF Name(s)AitacInfluence
filter3HCLUST-176_CBFB.UNK.0.A, HCLUST-161_GLI3.UNK.0.A, HCLUST-116_KLF8.UNK.0.A, HCLUST-127_FOXJ2.UNK.0.A, HCLUST-8_TBX21.UNK.0.A, HCLUST-185_EGR1.UNK.0.A0.005059514071722837
filter9HCLUST-151_HSF1.UNK.0.A0.0005420086359834736
Click here for Denovo Patterns by Aitac in CEBPB for fold-6: #14
Pattern NameTF Name(s)AitacInfluence
filter49HCLUST-106_ATF6.UNK.0.A, HCLUST-186_ARNTL.UNK.0.A, HCLUST-143_E2F2.UNK.0.A0.04511756147483279
filter350.03418056875743683
filter32HCLUST-174_CEBPA.UNK.0.A, HCLUST-105_ATF4.UNK.0.A, HCLUST-133_CEBPD.UNK.0.A0.009460713455206328
filter11HCLUST-121_AR.UNK.0.A, HCLUST-176_CBFB.UNK.0.A, HCLUST-31_ZNF449.UNK.0.A, HCLUST-51_ZIM3.UNK.0.A, HCLUST-127_FOXJ2.UNK.0.A, HCLUST-55_ZNF136.UNK.0.A, HCLUST-58_ZNF250.UNK.0.A0.0031363830354860503
filter34HCLUST-74_SRF.UNK.0.A0.0021727132975229786
filter58HCLUST-175_ATF1.UNK.0.A, HCLUST-142_TAF1.UNK.0.A0.0021525233683859675
filter46HCLUST-106_ATF6.UNK.0.A0.0018044556986303638
filter24HCLUST-185_EGR1.UNK.0.A, HCLUST-139_SREBF1.UNK.0.A, HCLUST-161_GLI3.UNK.0.A, HCLUST-8_TBX21.UNK.0.A, HCLUST-68_ZNF341.UNK.0.A, HCLUST-1_GATA1.UNK.0.A, HCLUST-116_KLF8.UNK.0.A, HCLUST-176_CBFB.UNK.0.A, HCLUST-31_ZNF449.UNK.0.A0.0009971555231064425
filter3HCLUST-8_TBX21.UNK.0.A, HCLUST-161_GLI3.UNK.0.A, HCLUST-27_TAL1.UNK.0.A, HCLUST-116_KLF8.UNK.0.A, HCLUST-108_FOXH1.UNK.0.A0.0008638738471203829
filter10.0007134999859777418
filter57HCLUST-8_TBX21.UNK.0.A, HCLUST-147_ESR1.UNK.0.A0.0006584498306860246
filter38HCLUST-149_CTCFL.UNK.0.A, HCLUST-122_TFAP2B.UNK.0.A, HCLUST-63_ZNF320.UNK.0.A0.0004119439564808684
filter26HCLUST-49_ZFX.UNK.0.A, HCLUST-122_TFAP2B.UNK.0.A, HCLUST-170_TP53.UNK.0.A, HCLUST-184_KLF12.UNK.0.A, HCLUST-87_ZBTB48.UNK.0.A, HCLUST-135_MBD2.UNK.0.A0.00034410567512111004
filter12HCLUST-36_ZNF680.UNK.0.A, HCLUST-121_AR.UNK.0.A0.00029056874058164104
Click here for Denovo Patterns by Aitac in CEBPB for fold-7: #6
Pattern NameTF Name(s)AitacInfluence
filter0HCLUST-176_CBFB.UNK.0.A, HCLUST-185_EGR1.UNK.0.A, HCLUST-51_ZIM3.UNK.0.A, HCLUST-161_GLI3.UNK.0.A0.0042318836019674625
filter70.0037567814326760693
filter570.001043428027546221
filter30.0006908377866463216
filter50HCLUST-182_NFATC1.UNK.0.A, HCLUST-82_TWIST1.UNK.0.A, HCLUST-177_MEF2A.UNK.0.A, HCLUST-34_ZNF586.UNK.0.A0.0006470899264955723
filter43HCLUST-46_PRDM14.UNK.0.A, HCLUST-44_ZSCAN22.UNK.0.A, HCLUST-142_TAF1.UNK.0.A, HCLUST-49_ZFX.UNK.0.A, HCLUST-162_ASCL1.UNK.0.A, HCLUST-87_ZBTB48.UNK.0.A0.0004818343618733898
In [21]:
for fold_no in range(1,11):
    display(details(summary('Click here for ', b('Motifs'), ' by ', b('{}'.format('Aitac')),
                        ' in ', b(motif_name), ' for fold-', b(str(fold_no)),
                        ": #{}".format(len(tf_dict_tofold[fold_no])),
                       ), tf_tab_tofold[fold_no]))
Click here for Motifs by Aitac in CEBPB for fold-1: #49
TF NamePattern(s)
HCLUST-176_CBFB.UNK.0.A
Pattern NameAitacSignificance
filter330.00114196
filter560.00341961
filter381.39984e-05
HCLUST-127_FOXJ2.UNK.0.A
Pattern NameAitacSignificance
filter330.00298517
filter380.00309883
filter530.00222649
filter430.00458319
HCLUST-51_ZIM3.UNK.0.A
Pattern NameAitacSignificance
filter330.0105799
filter380.00195923
HCLUST-121_AR.UNK.0.A
Pattern NameAitacSignificance
filter330.0131387
filter380.002738
filter530.00841898
filter430.00935206
HCLUST-161_GLI3.UNK.0.A
Pattern NameAitacSignificance
filter330.018082900000000002
filter560.00861012
HCLUST-185_EGR1.UNK.0.A
Pattern NameAitacSignificance
filter560.00143837
filter50.00422449
HCLUST-139_SREBF1.UNK.0.A
Pattern NameAitacSignificance
filter560.00242864
filter00.00983527
HCLUST-8_TBX21.UNK.0.A
Pattern NameAitacSignificance
filter560.00734112
HCLUST-116_KLF8.UNK.0.A
Pattern NameAitacSignificance
filter560.00794107
filter00.00146944
filter50.00851848
HCLUST-184_KLF12.UNK.0.A
Pattern NameAitacSignificance
filter560.0109888
HCLUST-108_FOXH1.UNK.0.A
Pattern NameAitacSignificance
filter380.00336734
HCLUST-14_HOXA13.UNK.0.A
Pattern NameAitacSignificance
filter380.00454262
filter530.000226877
HCLUST-16_RFX1.UNK.0.A
Pattern NameAitacSignificance
filter80.00741083
HCLUST-119_AHR.UNK.0.A
Pattern NameAitacSignificance
filter00.00660287
HCLUST-102_NKX2-1.UNK.0.A
Pattern NameAitacSignificance
filter00.00721064
HCLUST-74_SRF.UNK.0.A
Pattern NameAitacSignificance
filter530.00161101
HCLUST-120_AIRE.UNK.0.A
Pattern NameAitacSignificance
filter530.00188357
HCLUST-123_CDX2.UNK.0.A
Pattern NameAitacSignificance
filter530.0031780999999999997
HCLUST-76_TBP.UNK.0.A
Pattern NameAitacSignificance
filter530.00769932
filter20.0150348
HCLUST-85_ZNF354A.UNK.0.A
Pattern NameAitacSignificance
filter530.00867397
HCLUST-13_CDX1.UNK.0.A
Pattern NameAitacSignificance
filter530.010007700000000001
HCLUST-134_INSM1.UNK.0.A
Pattern NameAitacSignificance
filter50.00254386
HCLUST-122_TFAP2B.UNK.0.A
Pattern NameAitacSignificance
filter50.00660613
HCLUST-27_TAL1.UNK.0.A
Pattern NameAitacSignificance
filter50.00887524
HCLUST-63_ZNF320.UNK.0.A
Pattern NameAitacSignificance
filter50.0101367
HCLUST-149_CTCFL.UNK.0.A
Pattern NameAitacSignificance
filter50.0114511
HCLUST-40_ZNF816.UNK.0.A
Pattern NameAitacSignificance
filter50.0115197
filter20.0146738
HCLUST-183_E2F1.UNK.0.A
Pattern NameAitacSignificance
filter50.012294
HCLUST-1_GATA1.UNK.0.A
Pattern NameAitacSignificance
filter50.017697499999999998
HCLUST-68_ZNF341.UNK.0.A
Pattern NameAitacSignificance
filter50.0194294
HCLUST-34_ZNF586.UNK.0.A
Pattern NameAitacSignificance
filter430.00024408
HCLUST-182_NFATC1.UNK.0.A
Pattern NameAitacSignificance
filter430.00109917
HCLUST-107_FEZF1.UNK.0.A
Pattern NameAitacSignificance
filter430.00446339
filter20.00296081
HCLUST-25_SOX17.UNK.0.A
Pattern NameAitacSignificance
filter430.00746994
filter20.00391488
HCLUST-24_SOX5.UNK.0.A
Pattern NameAitacSignificance
filter430.0124433
HCLUST-79_ZFP82.UNK.0.A
Pattern NameAitacSignificance
filter430.0126759
HCLUST-177_MEF2A.UNK.0.A
Pattern NameAitacSignificance
filter430.0156811
HCLUST-4_ATOH1.UNK.0.A
Pattern NameAitacSignificance
filter130.0125657
HCLUST-41_ZNF41.UNK.0.A
Pattern NameAitacSignificance
filter20.000675823
HCLUST-152_SOX10.UNK.0.A
Pattern NameAitacSignificance
filter20.00290255
HCLUST-159_EHF.UNK.0.A
Pattern NameAitacSignificance
filter20.00315683
HCLUST-21_ZNF350.UNK.0.A
Pattern NameAitacSignificance
filter20.0115435
HCLUST-98_EBF1.UNK.0.A
Pattern NameAitacSignificance
filter20.0183264
HCLUST-158_BCL11A.UNK.0.A
Pattern NameAitacSignificance
filter480.0022541
HCLUST-72_PAX6.UNK.0.A
Pattern NameAitacSignificance
filter480.00658099
HCLUST-88_ZFP28.UNK.0.A
Pattern NameAitacSignificance
filter480.00914487
HCLUST-165_LEF1.UNK.0.A
Pattern NameAitacSignificance
filter480.013485200000000001
HCLUST-151_HSF1.UNK.0.A
Pattern NameAitacSignificance
filter480.0137476
HCLUST-29_ZNF547.UNK.0.A
Pattern NameAitacSignificance
filter480.0177301
Click here for Motifs by Aitac in CEBPB for fold-2: #42
TF NamePattern(s)
HCLUST-118_MTF1.UNK.0.A
Pattern NameAitacSignificance
filter290.00148742
HCLUST-52_ZKSCAN1.UNK.0.A
Pattern NameAitacSignificance
filter290.013703299999999998
HCLUST-176_CBFB.UNK.0.A
Pattern NameAitacSignificance
filter10.00057388
filter50.00358304
HCLUST-31_ZNF449.UNK.0.A
Pattern NameAitacSignificance
filter10.00871931
filter390.017662099999999997
HCLUST-102_NKX2-1.UNK.0.A
Pattern NameAitacSignificance
filter10.00953474
HCLUST-121_AR.UNK.0.A
Pattern NameAitacSignificance
filter10.0152695
filter240.00965208
HCLUST-161_GLI3.UNK.0.A
Pattern NameAitacSignificance
filter50.00691361
HCLUST-185_EGR1.UNK.0.A
Pattern NameAitacSignificance
filter50.00779053
filter250.00877238
HCLUST-116_KLF8.UNK.0.A
Pattern NameAitacSignificance
filter50.011242499999999999
HCLUST-51_ZIM3.UNK.0.A
Pattern NameAitacSignificance
filter50.013897999999999999
HCLUST-139_SREBF1.UNK.0.A
Pattern NameAitacSignificance
filter50.017823500000000003
HCLUST-106_ATF6.UNK.0.A
Pattern NameAitacSignificance
filter90.00176177
filter600.00913497
HCLUST-74_SRF.UNK.0.A
Pattern NameAitacSignificance
filter400.00572795
HCLUST-13_CDX1.UNK.0.A
Pattern NameAitacSignificance
filter400.00809763
HCLUST-123_CDX2.UNK.0.A
Pattern NameAitacSignificance
filter400.00894901
HCLUST-46_PRDM14.UNK.0.A
Pattern NameAitacSignificance
filter400.010017299999999998
HCLUST-87_ZBTB48.UNK.0.A
Pattern NameAitacSignificance
filter400.0124579
HCLUST-14_HOXA13.UNK.0.A
Pattern NameAitacSignificance
filter400.013550799999999998
filter460.00518853
HCLUST-177_MEF2A.UNK.0.A
Pattern NameAitacSignificance
filter400.015094299999999998
filter240.017965799999999997
HCLUST-47_REST.UNK.0.A
Pattern NameAitacSignificance
filter580.0165676
HCLUST-90_ZBTB6.UNK.0.A
Pattern NameAitacSignificance
filter580.019799900000000002
HCLUST-153_CRX.UNK.0.A
Pattern NameAitacSignificance
filter630.00527059
HCLUST-21_ZNF350.UNK.0.A
Pattern NameAitacSignificance
filter630.00827331
HCLUST-182_NFATC1.UNK.0.A
Pattern NameAitacSignificance
filter630.00928837
filter240.00683075
HCLUST-85_ZNF354A.UNK.0.A
Pattern NameAitacSignificance
filter630.014784
HCLUST-6_PPARA.UNK.0.A
Pattern NameAitacSignificance
filter630.0166315
HCLUST-25_SOX17.UNK.0.A
Pattern NameAitacSignificance
filter630.0169807
filter240.00270948
HCLUST-120_AIRE.UNK.0.A
Pattern NameAitacSignificance
filter460.018189
HCLUST-93_NR2E3.UNK.0.A
Pattern NameAitacSignificance
filter460.0192168
HCLUST-40_ZNF816.UNK.0.A
Pattern NameAitacSignificance
filter250.00408148
HCLUST-159_EHF.UNK.0.A
Pattern NameAitacSignificance
filter250.00907674
HCLUST-68_ZNF341.UNK.0.A
Pattern NameAitacSignificance
filter250.0103274
HCLUST-135_MBD2.UNK.0.A
Pattern NameAitacSignificance
filter250.0160457
HCLUST-39_ZNF770.UNK.0.A
Pattern NameAitacSignificance
filter250.017099200000000002
HCLUST-184_KLF12.UNK.0.A
Pattern NameAitacSignificance
filter250.018365799999999998
HCLUST-36_ZNF680.UNK.0.A
Pattern NameAitacSignificance
filter240.00127153
HCLUST-34_ZNF586.UNK.0.A
Pattern NameAitacSignificance
filter240.00140827
HCLUST-42_ZNF85.UNK.0.A
Pattern NameAitacSignificance
filter240.00466579
HCLUST-24_SOX5.UNK.0.A
Pattern NameAitacSignificance
filter240.00785916
HCLUST-127_FOXJ2.UNK.0.A
Pattern NameAitacSignificance
filter240.00990509
HCLUST-73_SMARCA1.UNK.0.A
Pattern NameAitacSignificance
filter240.011796700000000002
HCLUST-107_FEZF1.UNK.0.A
Pattern NameAitacSignificance
filter240.013212999999999999
Click here for Motifs by Aitac in CEBPB for fold-3: #12
TF NamePattern(s)
HCLUST-176_CBFB.UNK.0.A
Pattern NameAitacSignificance
filter290.00404881
filter390.00604352
HCLUST-121_AR.UNK.0.A
Pattern NameAitacSignificance
filter290.00408984
HCLUST-161_GLI3.UNK.0.A
Pattern NameAitacSignificance
filter290.00684492
filter390.013779900000000001
filter180.00452817
HCLUST-116_KLF8.UNK.0.A
Pattern NameAitacSignificance
filter290.0175791
HCLUST-185_EGR1.UNK.0.A
Pattern NameAitacSignificance
filter390.00323405
HCLUST-139_SREBF1.UNK.0.A
Pattern NameAitacSignificance
filter390.00930896
HCLUST-51_ZIM3.UNK.0.A
Pattern NameAitacSignificance
filter390.018614099999999998
HCLUST-81_THAP1.UNK.0.A
Pattern NameAitacSignificance
filter180.00841208
HCLUST-27_TAL1.UNK.0.A
Pattern NameAitacSignificance
filter180.0182643
HCLUST-132_ALX1.UNK.0.A
Pattern NameAitacSignificance
filter550.00374523
HCLUST-42_ZNF85.UNK.0.A
Pattern NameAitacSignificance
filter550.00541717
HCLUST-85_ZNF354A.UNK.0.A
Pattern NameAitacSignificance
filter550.0123962
Click here for Motifs by Aitac in CEBPB for fold-4: #7
TF NamePattern(s)
HCLUST-176_CBFB.UNK.0.A
Pattern NameAitacSignificance
filter30.00136042
HCLUST-161_GLI3.UNK.0.A
Pattern NameAitacSignificance
filter30.00155965
HCLUST-116_KLF8.UNK.0.A
Pattern NameAitacSignificance
filter30.00835159
HCLUST-127_FOXJ2.UNK.0.A
Pattern NameAitacSignificance
filter30.010131200000000002
HCLUST-8_TBX21.UNK.0.A
Pattern NameAitacSignificance
filter30.0135584
HCLUST-185_EGR1.UNK.0.A
Pattern NameAitacSignificance
filter30.0179921
HCLUST-151_HSF1.UNK.0.A
Pattern NameAitacSignificance
filter90.00774443
Click here for Motifs by Aitac in CEBPB for fold-5: #28
TF NamePattern(s)
HCLUST-161_GLI3.UNK.0.A
Pattern NameAitacSignificance
filter30.00764123
HCLUST-176_CBFB.UNK.0.A
Pattern NameAitacSignificance
filter30.00804318
filter470.00028838
HCLUST-31_ZNF449.UNK.0.A
Pattern NameAitacSignificance
filter30.0100085
HCLUST-116_KLF8.UNK.0.A
Pattern NameAitacSignificance
filter30.012540899999999999
filter500.0103107
HCLUST-58_ZNF250.UNK.0.A
Pattern NameAitacSignificance
filter30.0158532
HCLUST-120_AIRE.UNK.0.A
Pattern NameAitacSignificance
filter490.0012606
filter300.0194988
HCLUST-127_FOXJ2.UNK.0.A
Pattern NameAitacSignificance
filter470.00170176
HCLUST-51_ZIM3.UNK.0.A
Pattern NameAitacSignificance
filter470.00211728
HCLUST-121_AR.UNK.0.A
Pattern NameAitacSignificance
filter470.00578457
filter300.0123211
HCLUST-14_HOXA13.UNK.0.A
Pattern NameAitacSignificance
filter470.0132304
filter300.0008601310000000001
HCLUST-135_MBD2.UNK.0.A
Pattern NameAitacSignificance
filter110.0138523
HCLUST-117_LHX3.UNK.0.A
Pattern NameAitacSignificance
filter260.0140394
HCLUST-74_SRF.UNK.0.A
Pattern NameAitacSignificance
filter300.00180416
HCLUST-123_CDX2.UNK.0.A
Pattern NameAitacSignificance
filter300.006176
HCLUST-182_NFATC1.UNK.0.A
Pattern NameAitacSignificance
filter300.00975279
HCLUST-34_ZNF586.UNK.0.A
Pattern NameAitacSignificance
filter300.0103661
HCLUST-25_SOX17.UNK.0.A
Pattern NameAitacSignificance
filter300.014967900000000001
HCLUST-148_ZNF146.UNK.0.A
Pattern NameAitacSignificance
filter300.0154494
HCLUST-68_ZNF341.UNK.0.A
Pattern NameAitacSignificance
filter427.53722e-05
HCLUST-185_EGR1.UNK.0.A
Pattern NameAitacSignificance
filter420.000237989
HCLUST-40_ZNF816.UNK.0.A
Pattern NameAitacSignificance
filter420.00221636
HCLUST-134_INSM1.UNK.0.A
Pattern NameAitacSignificance
filter420.00595039
filter500.011028200000000002
HCLUST-159_EHF.UNK.0.A
Pattern NameAitacSignificance
filter420.00874726
HCLUST-44_ZSCAN22.UNK.0.A
Pattern NameAitacSignificance
filter420.0107006
HCLUST-184_KLF12.UNK.0.A
Pattern NameAitacSignificance
filter420.010977500000000001
HCLUST-39_ZNF770.UNK.0.A
Pattern NameAitacSignificance
filter420.0120393
HCLUST-122_TFAP2B.UNK.0.A
Pattern NameAitacSignificance
filter500.00407542
HCLUST-63_ZNF320.UNK.0.A
Pattern NameAitacSignificance
filter500.0195085
Click here for Motifs by Aitac in CEBPB for fold-6: #35
TF NamePattern(s)
HCLUST-106_ATF6.UNK.0.A
Pattern NameAitacSignificance
filter490.0101005
filter460.0139172
HCLUST-186_ARNTL.UNK.0.A
Pattern NameAitacSignificance
filter490.0122194
HCLUST-143_E2F2.UNK.0.A
Pattern NameAitacSignificance
filter490.018758
HCLUST-174_CEBPA.UNK.0.A
Pattern NameAitacSignificance
filter320.00501272
HCLUST-105_ATF4.UNK.0.A
Pattern NameAitacSignificance
filter320.00656933
HCLUST-133_CEBPD.UNK.0.A
Pattern NameAitacSignificance
filter320.0127479
HCLUST-121_AR.UNK.0.A
Pattern NameAitacSignificance
filter110.00124128
filter120.0105126
HCLUST-176_CBFB.UNK.0.A
Pattern NameAitacSignificance
filter110.00143004
filter240.0168027
HCLUST-31_ZNF449.UNK.0.A
Pattern NameAitacSignificance
filter110.00643217
filter240.0173297
HCLUST-51_ZIM3.UNK.0.A
Pattern NameAitacSignificance
filter110.012505500000000001
HCLUST-127_FOXJ2.UNK.0.A
Pattern NameAitacSignificance
filter110.0163241
HCLUST-55_ZNF136.UNK.0.A
Pattern NameAitacSignificance
filter110.016836
HCLUST-58_ZNF250.UNK.0.A
Pattern NameAitacSignificance
filter110.018568099999999997
HCLUST-74_SRF.UNK.0.A
Pattern NameAitacSignificance
filter340.0164243
HCLUST-175_ATF1.UNK.0.A
Pattern NameAitacSignificance
filter580.0144792
HCLUST-142_TAF1.UNK.0.A
Pattern NameAitacSignificance
filter580.0174049
HCLUST-185_EGR1.UNK.0.A
Pattern NameAitacSignificance
filter240.0007191060000000001
HCLUST-139_SREBF1.UNK.0.A
Pattern NameAitacSignificance
filter240.00427217
HCLUST-161_GLI3.UNK.0.A
Pattern NameAitacSignificance
filter240.0046786
filter30.00628541
HCLUST-8_TBX21.UNK.0.A
Pattern NameAitacSignificance
filter240.00931565
filter30.00272487
filter570.00535622
HCLUST-68_ZNF341.UNK.0.A
Pattern NameAitacSignificance
filter240.0106225
HCLUST-1_GATA1.UNK.0.A
Pattern NameAitacSignificance
filter240.013016
HCLUST-116_KLF8.UNK.0.A
Pattern NameAitacSignificance
filter240.0136178
filter30.00721607
HCLUST-27_TAL1.UNK.0.A
Pattern NameAitacSignificance
filter30.00630907
HCLUST-108_FOXH1.UNK.0.A
Pattern NameAitacSignificance
filter30.0124008
HCLUST-147_ESR1.UNK.0.A
Pattern NameAitacSignificance
filter570.0186358
HCLUST-149_CTCFL.UNK.0.A
Pattern NameAitacSignificance
filter380.00374466
HCLUST-122_TFAP2B.UNK.0.A
Pattern NameAitacSignificance
filter380.0078361
filter260.00779681
HCLUST-63_ZNF320.UNK.0.A
Pattern NameAitacSignificance
filter380.00851548
HCLUST-49_ZFX.UNK.0.A
Pattern NameAitacSignificance
filter260.0054521
HCLUST-170_TP53.UNK.0.A
Pattern NameAitacSignificance
filter260.009398299999999998
HCLUST-184_KLF12.UNK.0.A
Pattern NameAitacSignificance
filter260.016761900000000003
HCLUST-87_ZBTB48.UNK.0.A
Pattern NameAitacSignificance
filter260.018719
HCLUST-135_MBD2.UNK.0.A
Pattern NameAitacSignificance
filter260.019832299999999997
HCLUST-36_ZNF680.UNK.0.A
Pattern NameAitacSignificance
filter120.00464017
Click here for Motifs by Aitac in CEBPB for fold-7: #14
TF NamePattern(s)
HCLUST-176_CBFB.UNK.0.A
Pattern NameAitacSignificance
filter00.00117591
HCLUST-185_EGR1.UNK.0.A
Pattern NameAitacSignificance
filter00.00787468
HCLUST-51_ZIM3.UNK.0.A
Pattern NameAitacSignificance
filter00.00928966
HCLUST-161_GLI3.UNK.0.A
Pattern NameAitacSignificance
filter00.0115723
HCLUST-182_NFATC1.UNK.0.A
Pattern NameAitacSignificance
filter500.00298469
HCLUST-82_TWIST1.UNK.0.A
Pattern NameAitacSignificance
filter500.00924081
HCLUST-177_MEF2A.UNK.0.A
Pattern NameAitacSignificance
filter500.0145459
HCLUST-34_ZNF586.UNK.0.A
Pattern NameAitacSignificance
filter500.015291999999999998
HCLUST-46_PRDM14.UNK.0.A
Pattern NameAitacSignificance
filter430.00205284
HCLUST-44_ZSCAN22.UNK.0.A
Pattern NameAitacSignificance
filter430.00466208
HCLUST-142_TAF1.UNK.0.A
Pattern NameAitacSignificance
filter430.010445900000000001
HCLUST-49_ZFX.UNK.0.A
Pattern NameAitacSignificance
filter430.011812799999999998
HCLUST-162_ASCL1.UNK.0.A
Pattern NameAitacSignificance
filter430.0168685
HCLUST-87_ZBTB48.UNK.0.A
Pattern NameAitacSignificance
filter430.019620099999999998
Click here for Motifs by Aitac in CEBPB for fold-8: #30
TF NamePattern(s)
HCLUST-174_CEBPA.UNK.0.A
Pattern NameAitacSignificance
filter310.019802
HCLUST-84_ZNF324.UNK.0.A
Pattern NameAitacSignificance
filter40.00129002
HCLUST-127_FOXJ2.UNK.0.A
Pattern NameAitacSignificance
filter40.00274013
filter635.8078e-05
HCLUST-176_CBFB.UNK.0.A
Pattern NameAitacSignificance
filter40.00295592
filter630.00449346
HCLUST-24_SOX5.UNK.0.A
Pattern NameAitacSignificance
filter40.00816497
HCLUST-116_KLF8.UNK.0.A
Pattern NameAitacSignificance
filter40.0120348
filter260.011347
HCLUST-185_EGR1.UNK.0.A
Pattern NameAitacSignificance
filter263.78542e-05
HCLUST-68_ZNF341.UNK.0.A
Pattern NameAitacSignificance
filter260.00204454
HCLUST-161_GLI3.UNK.0.A
Pattern NameAitacSignificance
filter260.00434855
HCLUST-184_KLF12.UNK.0.A
Pattern NameAitacSignificance
filter260.00442791
filter180.0126811
HCLUST-27_TAL1.UNK.0.A
Pattern NameAitacSignificance
filter260.0057393999999999995
HCLUST-139_SREBF1.UNK.0.A
Pattern NameAitacSignificance
filter260.0063648
filter180.0197085
HCLUST-1_GATA1.UNK.0.A
Pattern NameAitacSignificance
filter260.0074348
filter180.0175671
HCLUST-121_AR.UNK.0.A
Pattern NameAitacSignificance
filter630.0007333639999999999
HCLUST-181_HOXA9.UNK.0.A
Pattern NameAitacSignificance
filter630.0122413
HCLUST-14_HOXA13.UNK.0.A
Pattern NameAitacSignificance
filter630.0142483
HCLUST-51_ZIM3.UNK.0.A
Pattern NameAitacSignificance
filter630.018616499999999998
HCLUST-60_ZNF264.UNK.0.A
Pattern NameAitacSignificance
filter620.00119008
HCLUST-153_CRX.UNK.0.A
Pattern NameAitacSignificance
filter620.00401627
HCLUST-108_FOXH1.UNK.0.A
Pattern NameAitacSignificance
filter620.00732952
HCLUST-145_RELA.UNK.0.A
Pattern NameAitacSignificance
filter620.0150756
HCLUST-158_BCL11A.UNK.0.A
Pattern NameAitacSignificance
filter500.00784503
HCLUST-146_NR1I2.UNK.0.A
Pattern NameAitacSignificance
filter500.00836541
HCLUST-135_MBD2.UNK.0.A
Pattern NameAitacSignificance
filter500.013683700000000002
HCLUST-57_ZNF214.UNK.0.A
Pattern NameAitacSignificance
filter500.014703200000000001
HCLUST-87_ZBTB48.UNK.0.A
Pattern NameAitacSignificance
filter500.015543200000000002
HCLUST-129_ELF1.UNK.0.A
Pattern NameAitacSignificance
filter500.015543200000000002
HCLUST-143_E2F2.UNK.0.A
Pattern NameAitacSignificance
filter500.0187167
HCLUST-39_ZNF770.UNK.0.A
Pattern NameAitacSignificance
filter180.006418200000000001
HCLUST-38_ZNF768.UNK.0.A
Pattern NameAitacSignificance
filter180.00959545
Click here for Motifs by Aitac in CEBPB for fold-9: #22
TF NamePattern(s)
HCLUST-176_CBFB.UNK.0.A
Pattern NameAitacSignificance
filter120.00183649
filter400.00855181
filter230.019142700000000002
HCLUST-116_KLF8.UNK.0.A
Pattern NameAitacSignificance
filter120.00417719
filter400.00690676
HCLUST-161_GLI3.UNK.0.A
Pattern NameAitacSignificance
filter120.00776732
filter400.00559045
HCLUST-7_T.UNK.0.A
Pattern NameAitacSignificance
filter120.0102342
HCLUST-25_SOX17.UNK.0.A
Pattern NameAitacSignificance
filter120.0191305
HCLUST-183_E2F1.UNK.0.A
Pattern NameAitacSignificance
filter550.00583723
HCLUST-14_HOXA13.UNK.0.A
Pattern NameAitacSignificance
filter70.00958302
HCLUST-76_TBP.UNK.0.A
Pattern NameAitacSignificance
filter70.00958302
HCLUST-185_EGR1.UNK.0.A
Pattern NameAitacSignificance
filter400.000199592
HCLUST-139_SREBF1.UNK.0.A
Pattern NameAitacSignificance
filter400.00680632
HCLUST-68_ZNF341.UNK.0.A
Pattern NameAitacSignificance
filter400.00762419
HCLUST-184_KLF12.UNK.0.A
Pattern NameAitacSignificance
filter400.00990661
filter80.00316969
HCLUST-27_TAL1.UNK.0.A
Pattern NameAitacSignificance
filter400.0116822
HCLUST-1_GATA1.UNK.0.A
Pattern NameAitacSignificance
filter400.0139732
HCLUST-51_ZIM3.UNK.0.A
Pattern NameAitacSignificance
filter230.00248005
filter518.7493e-05
HCLUST-13_CDX1.UNK.0.A
Pattern NameAitacSignificance
filter230.00357891
HCLUST-81_THAP1.UNK.0.A
Pattern NameAitacSignificance
filter80.00040977900000000004
HCLUST-39_ZNF770.UNK.0.A
Pattern NameAitacSignificance
filter80.000518808
HCLUST-170_TP53.UNK.0.A
Pattern NameAitacSignificance
filter80.00656712
HCLUST-59_ZNF257.UNK.0.A
Pattern NameAitacSignificance
filter80.0135409
HCLUST-53_ZNF121.UNK.0.A
Pattern NameAitacSignificance
filter510.00458367
HCLUST-3_ZBTB18.UNK.0.A
Pattern NameAitacSignificance
filter510.0173514
Click here for Motifs by Aitac in CEBPB for fold-10: #29
TF NamePattern(s)
HCLUST-118_MTF1.UNK.0.A
Pattern NameAitacSignificance
filter250.000701716
HCLUST-52_ZKSCAN1.UNK.0.A
Pattern NameAitacSignificance
filter250.0163444
HCLUST-31_ZNF449.UNK.0.A
Pattern NameAitacSignificance
filter260.0034907
HCLUST-161_GLI3.UNK.0.A
Pattern NameAitacSignificance
filter260.0158686
HCLUST-176_CBFB.UNK.0.A
Pattern NameAitacSignificance
filter20.00512302
filter540.0008345000000000001
filter470.00459118
HCLUST-121_AR.UNK.0.A
Pattern NameAitacSignificance
filter20.00914789
filter540.0194762
filter420.0138499
filter470.000310123
filter180.012368899999999999
HCLUST-51_ZIM3.UNK.0.A
Pattern NameAitacSignificance
filter20.011584899999999999
filter470.00843772
HCLUST-127_FOXJ2.UNK.0.A
Pattern NameAitacSignificance
filter20.013281200000000002
filter470.00018649200000000002
HCLUST-58_ZNF250.UNK.0.A
Pattern NameAitacSignificance
filter20.0162868
HCLUST-64_ZNF322.UNK.0.A
Pattern NameAitacSignificance
filter510.0186814
HCLUST-92_NR2C2.UNK.0.A
Pattern NameAitacSignificance
filter630.00263478
HCLUST-183_E2F1.UNK.0.A
Pattern NameAitacSignificance
filter630.00692389
HCLUST-6_PPARA.UNK.0.A
Pattern NameAitacSignificance
filter630.011173299999999999
HCLUST-38_ZNF768.UNK.0.A
Pattern NameAitacSignificance
filter630.0190272
HCLUST-41_ZNF41.UNK.0.A
Pattern NameAitacSignificance
filter540.00345682
HCLUST-108_FOXH1.UNK.0.A
Pattern NameAitacSignificance
filter540.0119316
HCLUST-128_HNF4A.UNK.0.A
Pattern NameAitacSignificance
filter540.014127299999999999
HCLUST-177_MEF2A.UNK.0.A
Pattern NameAitacSignificance
filter420.00737413
filter180.00112299
HCLUST-166_NANOG.UNK.0.A
Pattern NameAitacSignificance
filter420.015140700000000002
HCLUST-14_HOXA13.UNK.0.A
Pattern NameAitacSignificance
filter470.00782953
HCLUST-88_ZFP28.UNK.0.A
Pattern NameAitacSignificance
filter470.0108251
HCLUST-120_AIRE.UNK.0.A
Pattern NameAitacSignificance
filter470.0108804
HCLUST-24_SOX5.UNK.0.A
Pattern NameAitacSignificance
filter470.011841500000000001
HCLUST-126_FOXA1.UNK.0.A
Pattern NameAitacSignificance
filter470.0158024
HCLUST-34_ZNF586.UNK.0.A
Pattern NameAitacSignificance
filter180.00110665
HCLUST-182_NFATC1.UNK.0.A
Pattern NameAitacSignificance
filter180.00174801
HCLUST-107_FEZF1.UNK.0.A
Pattern NameAitacSignificance
filter180.00846184
HCLUST-40_ZNF816.UNK.0.A
Pattern NameAitacSignificance
filter180.00928746
HCLUST-76_TBP.UNK.0.A
Pattern NameAitacSignificance
filter180.013574200000000002
In [23]:
from matlas.matches import vdom_pssm
from matlas.genome_data import *
from modisco.visualization import viz_sequence
from matlas.pwms import load_motifDB_id_maps

def display_redundancy():
    tf_to_fold_count = OrderedDict()
    for fold_no in range(1,11):
        for tfname in ob_tofold[fold_no].tf_to_pattern.keys():
            if tfname not in tf_to_fold_count:
                tf_to_fold_count[tfname] = 1
            else:
                tf_to_fold_count[tfname] += 1

    sorted_tfs = sorted(tf_to_fold_count.items(), key=lambda x: x[1], reverse=True)
#     return sorted_tfs, 0
    motifDB_data = load_motifDB_id_maps(database_name='HOCOMOCO.nonredundant.annotated', method_to_load=None)

    redundant_tfs = OrderedDict()
    redundant_ppms = OrderedDict() 
    for tfname, count in sorted_tfs:
        if tf_to_fold_count[tfname] >= 5:
            pssm = viz_sequence.ic_scale(motifDB_data['maps'][tfname]['PPM'], np.array(BP_BACKGROUND))
            redundant_ppms[tfname] = vdom_pssm(pssm, letter_width=0.15, height=0.5)
            redundant_tfs[tfname] = count 
    
#     tab = table(thead(th('TF Name'), th('PPM'), th("Fold count")),
#                   tbody([tr(td(b(name)), td(redundant_ppms[name]), 
#                             td(b(str(value))))
#                              for name, value in redundant_tfs.items()]
#                        )
#                     )
#     display(tab)
# include median significance, median influence
    return redundant_tfs, redundant_ppms

redundant_tfs, redundant_ppms = display_redundancy()
from vdom.helpers import (h1, p, li, img, div, b, br, ul, img, a, 
                          details, summary,
                          table, thead, th, tr, tbody, td, ol)
tab = table(thead(th('TF Name'), th('PPM'), th("Fold count")),
                  tbody([tr(td(b(name)), td(redundant_ppms[name]), 
                            td(b(str(value))))
                             for name, value in redundant_tfs.items()]
                       )
                    )
# display(tab)

display(details(summary('Click here for ', b('HOCOMOCCO Merged Motifs'), ' sorted by ', b('Redundancy'),
                        ' in ', b(motif_name), ), 
                tab))
Click here for HOCOMOCCO Merged Motifs sorted by Redundancy in CEBPB
TF NamePPMFold count
HCLUST-176_CBFB.UNK.0.A10
HCLUST-161_GLI3.UNK.0.A10
HCLUST-51_ZIM3.UNK.0.A9
HCLUST-185_EGR1.UNK.0.A9
HCLUST-116_KLF8.UNK.0.A8
HCLUST-127_FOXJ2.UNK.0.A7
HCLUST-121_AR.UNK.0.A7
HCLUST-139_SREBF1.UNK.0.A6
HCLUST-184_KLF12.UNK.0.A6
HCLUST-14_HOXA13.UNK.0.A6
HCLUST-68_ZNF341.UNK.0.A6
HCLUST-27_TAL1.UNK.0.A5
HCLUST-34_ZNF586.UNK.0.A5
HCLUST-182_NFATC1.UNK.0.A5

Rsat clustering on aitac motifs

In [16]:
motif_name = 'CEBPB'
aitacdir = "/mnt/lab_data2/msharmin/oc-atlas/DanSkinData/fold_a_alex/{}".format(motif_name)
from matlas.pwms import get_motifDB_id_maps, reduce_pwm_redundancy
from matlas.genome_data import *

memefile = "{0}/filter_motifs_pwm.meme".format(aitacdir)
motif_id_maps = get_motifDB_id_maps(database_name=memefile)

file_prefix = "/mnt/lab_data2/msharmin/oc-atlas/DanSkinData/fold_a_alex/{}/aitac".format(motif_name)
reduce_pwm_redundancy(
        motif_id_maps,
        out_pwm_file="{}.hclust_pwms.tmp".format(file_prefix),
        pwmtype="probabilities", #probabilities
        tmp_prefix=file_prefix,
        pseudocount=PSEUDOCOUNT,
        ic_thresh=IC_THRESHOLD,
        cor_thresh=0.6,
        ncor_thresh=0.4,
        num_threads=28)
filter4 2
filter8 0
filter15 0
filter16 0
filter32 0
filter40 1
filter54 2
/mnt/lab_data2/msharmin/oc-atlas/DanSkinData/fold_a_alex/CEBPB/aitac.cor.motifs.mat.txt /mnt/lab_data2/msharmin/oc-atlas/DanSkinData/fold_a_alex/CEBPB/aitac.ncor.motifs.mat.txt
0
4 filter5 (4, 12) 0.9999999999999996
7 filter12 (4, 21) 0.9999999999999998
(26, 26)
filter0;filter19 0.7078714398899135 1.0
filter2;filter41 0.7297810025907647 1.0
saving out filter34
saving out filter42
saving out filter57
filter43;filter61 0.6862553250824988 1.0
filter1;filter3 0.7906007147545048 0.8894258040988179
filter36;filter52 0.6600176379662774 1.0
saving out filter60
saving out filter11
filter44;filter50 0.6855557454701704 1.0
filter26;filter39 0.7108863479037282 1.0
saving out filter0;filter19
saving out filter35
saving out filter2;filter41
saving out filter45
saving out filter27
saving out filter5
saving out filter12
saving out filter7
saving out filter1;filter3
saving out filter26;filter39
saving out filter43;filter61
saving out filter36;filter52
saving out filter17
saving out filter44;filter50
/users/msharmin/code/mouse-atlas/matlas/pwms.py:449: FutureWarning:

read_table is deprecated, use read_csv instead, passing sep='\t'.

/users/msharmin/code/mouse-atlas/matlas/pwms.py:463: ClusterWarning:

scipy.cluster: The symmetric non-negative hollow observation matrix looks suspiciously like an uncondensed distance matrix

In [17]:
from matlas.reports import generate_merged_pwm_plots
from matlas.pwms import read_pwm_file, load_cisbp_maps

mergeddict = read_pwm_file("{}.hclust_pwms.tmp".format(file_prefix))
tab, tabledict = generate_merged_pwm_plots(mergeddict, motif_id_maps)
In [3]:
#display(tab)
In [ ]: