In [1]:
import h5py
f = h5py.File("/mnt/lab_data2/msharmin/oc-atlas/DanSkinData/fold_0_ggr/model_preds_early/ggr.predictions.h5")
print(list(f))
unfold_logits = f['logits'][:]
labels = f['labels'][:]
cur_seqs = f['features'][:]
#activations = f['CONV1_ACTIVATION'][:]
f.close()
['ATAC_LABELS', 'ATAC_SIGNALS', 'ATAC_SIGNALS.NORM', 'CONV1_ACTIVATION', 'CTCF_LABELS', 'CTCF_SIGNALS', 'CTCF_SIGNALS.NORM', 'DYNAMIC_MARK_LABELS', 'DYNAMIC_STATE_LABELS', 'H3K27ac_LABELS', 'H3K27ac_SIGNALS', 'H3K27ac_SIGNALS.NORM', 'H3K27me3_LABELS', 'H3K27me3_SIGNALS', 'H3K27me3_SIGNALS.NORM', 'H3K4me1_LABELS', 'H3K4me1_SIGNALS', 'H3K4me1_SIGNALS.NORM', 'KLF4_LABELS', 'POL2_LABELS', 'STABLE_MARK_LABELS', 'STABLE_STATE_LABELS', 'TP63_LABELS', 'TRAJ_LABELS', 'ZNF750_LABELS', 'example_metadata', 'features', 'labels', 'logits', 'logits.ci', 'logits.ci.thresh', 'logits.multimodel.norm', 'logits.norm', 'positive_importance_bp_sum', 'probs', 'pwm-scores.null.idx', 'sequence-weighted.thresholds', 'sequence.active.gc_fract', 'sequence.active.string']
In [4]:
from matlas.aitac_motifs import get_task_cors
import numpy as np
correlations = get_task_cors(labels, unfold_logits, verbose=True)
idx = np.argwhere(np.asarray(correlations)>0.75).squeeze()
print(len(idx), len(correlations))
weighted_cor is nan
number of NaN values: 816
18358 35840
/users/msharmin/anaconda2/envs/basepair13/lib/python3.6/site-packages/ipykernel_launcher.py:4: RuntimeWarning: invalid value encountered in greater
  after removing the cwd from sys.path.
In [6]:
x2 = cur_seqs[idx, :, :]
y2 = labels[idx, :]
pred_full_model2 = unfold_logits[idx,:]
#correlations2 = get_task_cors(y2, pred_full_model2, verbose=True)
weighted_cor is 0.8942837543332051
number of NaN values: 0
In [7]:
import numpy as np
filter_predictions = np.load("/mnt/lab_data2/msharmin/oc-atlas/DanSkinData/fold_0_ggr/result_early/filter_predictions.npy")
filter_predictions.shape
Out[7]:
(18358, 300, 19)
In [11]:
print(filter_predictions.shape)
print(y2.shape)
correlations2 = np.array(correlations2)
print(correlations2.shape)
(18358, 300, 19)
(18358, 19)
(18358,)
In [12]:
from matlas.aitac_motifs import get_filt_corr
filt_corr, filt_infl, ave_filt_infl = get_filt_corr(filter_predictions, y2, correlations2, verbose=True)
Shape of filter-wise correlations:
(18358, 300)
Shape of filter influence:
(18358, 300)
In [ ]:
from matlas.aitac_motifs import get_memes
pwm, act_ind, nseqs, activated_OCRs, n_activated_OCRs, OCR_matrix = get_memes(
        activations.squeeze(), x2.squeeze(), y2, 
        output_file_path="/mnt/lab_data2/msharmin/oc-atlas/DanSkinData/fold_0_ggr/result_early/".format(i))
In [ ]:
from matlas.matches import DenovoAitac
motif_name = 'result_early'
aitacdir = "/mnt/lab_data2/msharmin/oc-atlas/DanSkinData/fold_0_ggr/{}".format(motif_name)

ob = DenovoAitac(aitacdir, influence=ave_filt_infl)
# ob.fetch_tomtom_matches(
#             meme_db="/mnt/lab_data/kundaje/users/msharmin/annotations/HOCOMOCOv11_core_pwms_HUMAN_mono.renamed.nonredundant.annotated.meme",
#             database_name="HOCOMOCO.nonredundant.annotated",
#             save_report=True, tomtom_dir= "{0}/{1}_tomtomout".format(aitacdir, "HOCOMOCO.nonredundant.annotated"))
ob.load_matched_motifs(database_name="HOCOMOCO.nonredundant.annotated")
ob.get_motif_per_celltype(match_threshold=0.05, database_name="HOCOMOCO.nonredundant.annotated")
pattern_tab, pattern_dict = ob.visualize_pattern_table()
tf_tab, tf_dict = ob.visualize_tf_table("Aitac")
In [14]:
from vdom.helpers import (b, summary, details)
from IPython.display import display

display(details(summary('Click here for ', b('Denovo Patterns'), ' by ', b('{}'.format('Aitac')),
                        ' in ', b(motif_name),
                        ": #{}".format(len(pattern_dict)),
                       ), pattern_tab))
Click here for Denovo Patterns by Aitac in result_early: #300
Pattern NameTF Name(s)AitacInfluence
filter650.39692069297982674
filter1130.3965293181993984
filter35HCLUST-133_CEBPD.UNK.0.A, HCLUST-105_ATF4.UNK.0.A0.39603974264792463
filter1210.3959032731597305
filter1620.3957356375856513
filter30.3956807838134664
filter740.39518833289155064
filter263HCLUST-127_FOXJ2.UNK.0.A, HCLUST-121_AR.UNK.0.A0.3949540071448937
filter1970.39448879371553747
filter490.394321397262071
filter1960.39427190670075674
filter920.39413237475885016
filter99HCLUST-156_TEAD1.UNK.0.A0.3940573605387874
filter1080.39394536171029704
filter630.39386065434651407
filter1570.3937736116366374
filter2830.3937564852073185
filter2560.39374013838068206
filter2430.39364751067901843
filter860.3936418350592702
filter710.3934375644645762
filter2290.3931881377784955
filter1860.39316352521778775
filter830.3930043744115605
filter245HCLUST-96_NKX6-1.UNK.0.A0.3929561858701997
filter1260.39281909901057754
filter230.3926822742062069
filter2730.3926390850110359
filter1110.3926086597512222
filter840.3925237690622263
filter2090.3925232125541634
filter2410.3925171202183247
filter2850.3924242607421898
filter1430.3924205327028073
filter105HCLUST-159_EHF.UNK.0.A, HCLUST-129_ELF1.UNK.0.A, HCLUST-130_ERG.UNK.0.A, HCLUST-158_BCL11A.UNK.0.A0.39240820513800956
filter181HCLUST-10_IRF1.UNK.0.A0.39234579996080043
filter1900.39225339096233686
filter244HCLUST-48_SMAD3.UNK.0.A0.39216840411613935
filter510.3921541266411352
filter1820.3920779854088036
filter980.3920635768275929
filter2370.3920417681615961
filter2250.3919955346031689
filter130HCLUST-14_HOXA13.UNK.0.A0.39193931726678805
filter2330.39193651353886777
filter2030.39185488247305694
filter1360.3918506181529674
filter25HCLUST-85_ZNF354A.UNK.0.A, HCLUST-19_ZNF418.UNK.0.A, HCLUST-160_ZNF394.UNK.0.A0.39179523403286876
filter1830.3917940759153802
filter720.39172402978427373
filter370.3917125772745606
filter820.391589953239118
filter570.3915790039067157
filter1230.39156736451829693
filter2530.3915658399152111
filter2930.39152874263921206
filter2310.39149265986296083
filter125HCLUST-184_KLF12.UNK.0.A0.3914600336603481
filter1990.3914350651276341
filter5HCLUST-167_NFKB1.UNK.0.A, HCLUST-145_RELA.UNK.0.A0.39139010647875777
filter1280.39136275934517883
filter620.39132642472491425
filter140.39115581921115605
filter480.3911531449230461
filter1490.39112408729867315
filter2620.3910874304786279
filter1550.3910195211574546
filter1330.39100598419485355
filter2690.39100510771733343
filter2280.39100390537878915
filter107HCLUST-9_IRF7.UNK.0.A0.39100030113635253
filter970.39099467471070676
filter1760.3909388894006432
filter1150.3909368370399745
filter2910.39090610280623994
filter930.39089186638466494
filter850.39086717570496216
filter2170.39086693708010994
filter1410.3908475771435071
filter2790.3908131706317856
filter870.3908038515380383
filter270.3907133440297913
filter1530.3907073770825064
filter45HCLUST-12_NR2C1.UNK.0.A0.3906732773966461
filter2840.3906709646451218
filter70.390669461852012
filter2750.3905450847972845
filter110.3904958771608854
filter1730.39048757509915893
filter1180.3904630074412727
filter1090.39043477742760546
filter940.39043390437441
filter1020.3904212873848708
filter1510.3903734229477917
filter90.3903161888885746
filter1120.3903121854000979
filter700.39029783031786014
filter1420.39028920783767623
filter1580.39019355843339587
filter190.39018544881493383
filter1950.3901770940417199
filter2590.3901752045143702
filter1010.39014360463201936
filter2150.39011667094357233
filter1140.3900932283909286
filter1500.3900825312852405
filter1800.39007216376740705
filter170.3898940160006222
filter239HCLUST-186_ARNTL.UNK.0.A0.38987843877014877
filter1440.38986842325405663
filter2200.389845154649772
filter69HCLUST-179_BACH1.UNK.0.A, HCLUST-101_NFE2.UNK.0.A0.38983088418708073
filter300.38978991836967286
filter31HCLUST-185_EGR1.UNK.0.A, HCLUST-68_ZNF341.UNK.0.A0.3897844389406357
filter42HCLUST-180_BCL6.UNK.0.A0.3897674762578256
filter180.3897419503315691
filter1160.3896871107297401
filter430.3895742257248692
filter1000.3895456489939432
filter1270.3894343814791384
filter1840.38936161522240853
filter2970.3893530021972201
filter470.38935163490982566
filter2870.38930852689040707
filter2100.38928697235210086
filter2980.3892471706291152
filter880.38924515981255153
filter39HCLUST-137_NFIA.UNK.0.A0.38919163682200797
filter2350.3891671967641547
filter00.38913519047886924
filter1320.38911208864751945
filter1790.3891036413949393
filter110HCLUST-176_CBFB.UNK.0.A0.3890363361063424
filter2210.3890025097896467
filter2010.3889953668410628
filter1880.3889746528102923
filter1610.3889689071282116
filter185HCLUST-39_ZNF770.UNK.0.A0.38895348925235956
filter2260.38894438189600555
filter138HCLUST-124_FOSB.UNK.0.A0.388942452090794
filter890.38891497760524635
filter1770.3888803846418666
filter2120.38878805756129203
filter1030.3887707676082266
filter1040.38874696123584174
filter1710.3887405227962985
filter20.3887395263327615
filter2140.3886974966609536
filter280.3886588032579796
filter152HCLUST-175_ATF1.UNK.0.A, HCLUST-42_ZNF85.UNK.0.A0.3886548844884746
filter1400.3886489944212345
filter2460.38860580672793626
filter1390.388583750409215
filter1170.3885261719382409
filter2060.38851148482873693
filter2760.3885071890769255
filter2050.38846868079023633
filter281HCLUST-165_LEF1.UNK.0.A0.3883507428809463
filter160.3882697695370934
filter2230.3882464177095983
filter2130.38818148367984423
filter560.3881695757605902
filter950.38815537976769615
filter2680.3880994387527551
filter2470.3880958432612733
filter500.3880847517676034
filter520.38808021880836446
filter2890.3880577038387872
filter2270.3879801360952187
filter2160.3879662138665029
filter1980.3879413727893404
filter1450.38786870840799065
filter1780.3878585743525434
filter670.3878394625072568
filter2300.38780854250286656
filter320.3878032294313606
filter1190.3877726626339709
filter2820.38768557050441577
filter410.3876460559268405
filter2020.38763772894898624
filter2180.38762097178579275
filter2940.38758944920365995
filter2510.38756418416475363
filter2600.38756180282411057
filter340.38754762994036995
filter460.3875472214098172
filter1630.3875024856514646
filter550.3875006528068425
filter211HCLUST-149_CTCFL.UNK.0.A, HCLUST-142_TAF1.UNK.0.A0.38748238750923797
filter2480.3874448460445272
filter208HCLUST-170_TP53.UNK.0.A0.3874418668670133
filter2880.38736077680165054
filter2040.38733426807147153
filter750.3872390298485174
filter2650.38722398374628947
filter2710.387205599338412
filter590.3871713159604729
filter2550.38716809212906544
filter610.3871591655225438
filter2610.387149178767747
filter2220.3871402983412252
filter1650.3871227854200592
filter2240.3871205740572246
filter1940.3870575744395499
filter1470.38700596558796335
filter68HCLUST-169_TFAP2A.UNK.0.A0.3869781112672157
filter1660.3869243591789378
filter2400.38688191164110114
filter2660.38687735744150686
filter960.3868735710444238
filter330.3867354708631155
filter210.3867205946101058
filter2700.38666020733531126
filter2570.3866564001492899
filter187HCLUST-152_SOX10.UNK.0.A, HCLUST-25_SOX17.UNK.0.A0.38654836096806366
filter290.386513036234612
filter6HCLUST-82_TWIST1.UNK.0.A0.38647928826755207
filter2490.38646140511563365
filter780.3864504329435867
filter800.3864494422193028
filter790.3864245022399947
filter1560.3863876232614762
filter1370.38636050473731987
filter2950.3863154948566527
filter2190.3862117347699711
filter2360.3860893819896614
filter73HCLUST-117_LHX3.UNK.0.A0.38604802390894694
filter2720.38597505344204114
filter1310.3859727700345629
filter160HCLUST-77_TCF7.UNK.0.A0.3859094071889212
filter200.38590435993466654
filter150.38588033980276903
filter1670.38586415044180006
filter2540.38584971430482606
filter260.3857520037241293
filter53HCLUST-177_MEF2A.UNK.0.A, HCLUST-76_TBP.UNK.0.A0.3857235241689698
filter134HCLUST-75_STAT6.UNK.0.A0.38569632488476707
filter2800.3856942615313162
filter80.3856381821444396
filter106HCLUST-134_INSM1.UNK.0.A0.3855573789322181
filter640.3855302209610852
filter2770.3855233153925706
filter540.38549926125731093
filter1540.3854525119733026
filter1690.38543570760431
filter1920.38537205400365593
filter760.3853529987898851
filter240.3853321944531809
filter2780.3852202133558304
filter900.3851583251309637
filter1220.3851000905140902
filter130.38508591172012996
filter193HCLUST-110_GRHL2.UNK.0.A0.38504717004968675
filter1680.3850106177588502
filter1590.38499355835509064
filter2920.3849760768804153
filter1720.38493307516486613
filter910.384843941304926
filter360.3848010582658278
filter10.38475343614169794
filter400.3847500664088574
filter1240.38472770060482475
filter1910.3847027305968971
filter1700.384542247059855
filter220.38443967300240983
filter2340.3843624223902204
filter2500.38419738920133656
filter770.38418651771030127
filter600.38415819451457184
filter2420.3841528806295955
filter2380.3840790128961105
filter40.3840659697346514
filter2070.384004532175154
filter2740.38396250679450833
filter380.38395448967216794
filter2900.38390469422244977
filter2670.38376207808579493
filter1740.3836099813108334
filter2990.38359955721546396
filter2960.38343739624472184
filter258HCLUST-38_ZNF768.UNK.0.A0.38341534210036426
filter440.38325247789631683
filter1290.38321697868073956
filter120.3830320884370176
filter148HCLUST-171_THAP11.UNK.0.A0.38302594649658833
filter660.382841039192939
filter189HCLUST-150_HNF1A.UNK.0.A0.3824491515883446
filter2520.38227404619856664
filter146HCLUST-57_ZNF214.UNK.0.A0.38212424293896663
filter81HCLUST-126_FOXA1.UNK.0.A0.3820372531088922
filter1200.38193088557101673
filter1350.3818233677083494
filter2860.381666116084951
filter2640.38130385347546253
filter2320.38119167005746124
filter2000.3810277154057643
filter580.38095485163246445
filter1640.38079362286296425
filter1750.38072489969614326
filter100.3805020655303455
In [15]:
display(details(summary('Click here for ', b('Motifs'), ' by ', b('{}'.format('Aitac')),
                        ' in ', b(motif_name),
                        ": #{}".format(len(tf_dict)),
                       ), tf_tab))
Click here for Motifs by Aitac in result_early: #53
TF NamePattern(s)
HCLUST-133_CEBPD.UNK.0.A
Pattern NameAitacSignificance
filter350.00099916
HCLUST-105_ATF4.UNK.0.A
Pattern NameAitacSignificance
filter350.00197511
HCLUST-127_FOXJ2.UNK.0.A
Pattern NameAitacSignificance
filter2630.00178197
HCLUST-121_AR.UNK.0.A
Pattern NameAitacSignificance
filter2630.0288961
HCLUST-156_TEAD1.UNK.0.A
Pattern NameAitacSignificance
filter990.0204899
filter1880.0344144
filter1650.0381344
filter2640.000107261
HCLUST-96_NKX6-1.UNK.0.A
Pattern NameAitacSignificance
filter2450.0212288
filter2930.022356099999999997
filter2310.0328245
filter1490.0233075
filter1950.04889280000000001
filter1880.0344144
filter240.041511400000000004
filter810.042475900000000004
filter2640.013073
HCLUST-159_EHF.UNK.0.A
Pattern NameAitacSignificance
filter1050.00613495
filter420.00625258
HCLUST-129_ELF1.UNK.0.A
Pattern NameAitacSignificance
filter1050.019844900000000002
filter420.015339700000000001
HCLUST-130_ERG.UNK.0.A
Pattern NameAitacSignificance
filter1050.019844900000000002
filter420.00625258
filter1200.0352403
HCLUST-158_BCL11A.UNK.0.A
Pattern NameAitacSignificance
filter1050.0394814
filter1070.00715939
filter420.00625258
HCLUST-10_IRF1.UNK.0.A
Pattern NameAitacSignificance
filter1810.024127799999999998
filter250.00353648
filter1070.00440042
filter580.0111455
HCLUST-48_SMAD3.UNK.0.A
Pattern NameAitacSignificance
filter2440.0299605
HCLUST-14_HOXA13.UNK.0.A
Pattern NameAitacSignificance
filter1300.00424252
HCLUST-85_ZNF354A.UNK.0.A
Pattern NameAitacSignificance
filter250.038788800000000005
HCLUST-19_ZNF418.UNK.0.A
Pattern NameAitacSignificance
filter250.038788800000000005
HCLUST-160_ZNF394.UNK.0.A
Pattern NameAitacSignificance
filter250.038788800000000005
HCLUST-184_KLF12.UNK.0.A
Pattern NameAitacSignificance
filter1250.00711753
filter2170.0487712
HCLUST-167_NFKB1.UNK.0.A
Pattern NameAitacSignificance
filter50.00032605
filter10.014251900000000001
HCLUST-145_RELA.UNK.0.A
Pattern NameAitacSignificance
filter50.000420126
filter10.00285722
HCLUST-9_IRF7.UNK.0.A
Pattern NameAitacSignificance
filter1070.0392788
filter580.00481139
HCLUST-12_NR2C1.UNK.0.A
Pattern NameAitacSignificance
filter450.0108969
HCLUST-186_ARNTL.UNK.0.A
Pattern NameAitacSignificance
filter2390.0104315
HCLUST-179_BACH1.UNK.0.A
Pattern NameAitacSignificance
filter690.00579088
filter2610.026868700000000002
HCLUST-101_NFE2.UNK.0.A
Pattern NameAitacSignificance
filter690.00849114
filter1380.0331521
filter2610.0251907
filter600.013607
filter120.0235375
HCLUST-185_EGR1.UNK.0.A
Pattern NameAitacSignificance
filter310.0054532
filter1060.0492526
HCLUST-68_ZNF341.UNK.0.A
Pattern NameAitacSignificance
filter310.0384089
HCLUST-180_BCL6.UNK.0.A
Pattern NameAitacSignificance
filter420.0293062
HCLUST-137_NFIA.UNK.0.A
Pattern NameAitacSignificance
filter394.59784e-05
HCLUST-176_CBFB.UNK.0.A
Pattern NameAitacSignificance
filter1100.0102431
HCLUST-39_ZNF770.UNK.0.A
Pattern NameAitacSignificance
filter1850.0336827
HCLUST-124_FOSB.UNK.0.A
Pattern NameAitacSignificance
filter1380.00882047
filter2610.000523858
filter2660.00814642
filter600.0055326
HCLUST-175_ATF1.UNK.0.A
Pattern NameAitacSignificance
filter1520.000676925
HCLUST-42_ZNF85.UNK.0.A
Pattern NameAitacSignificance
filter1520.0158119
HCLUST-165_LEF1.UNK.0.A
Pattern NameAitacSignificance
filter2810.0304423
HCLUST-149_CTCFL.UNK.0.A
Pattern NameAitacSignificance
filter2110.00303441
filter800.00101643
filter1060.040106499999999996
HCLUST-142_TAF1.UNK.0.A
Pattern NameAitacSignificance
filter2110.0206132
HCLUST-170_TP53.UNK.0.A
Pattern NameAitacSignificance
filter2082.2221e-05
filter760.00545122
HCLUST-169_TFAP2A.UNK.0.A
Pattern NameAitacSignificance
filter680.011057700000000002
HCLUST-152_SOX10.UNK.0.A
Pattern NameAitacSignificance
filter1870.000410376
HCLUST-25_SOX17.UNK.0.A
Pattern NameAitacSignificance
filter1870.00395551
HCLUST-82_TWIST1.UNK.0.A
Pattern NameAitacSignificance
filter60.000128355
HCLUST-117_LHX3.UNK.0.A
Pattern NameAitacSignificance
filter730.022159900000000003
HCLUST-77_TCF7.UNK.0.A
Pattern NameAitacSignificance
filter1600.0115061
HCLUST-177_MEF2A.UNK.0.A
Pattern NameAitacSignificance
filter530.00835318
HCLUST-76_TBP.UNK.0.A
Pattern NameAitacSignificance
filter530.0135976
HCLUST-75_STAT6.UNK.0.A
Pattern NameAitacSignificance
filter1340.0197229
HCLUST-134_INSM1.UNK.0.A
Pattern NameAitacSignificance
filter1060.013584700000000002
HCLUST-110_GRHL2.UNK.0.A
Pattern NameAitacSignificance
filter1930.000140106
HCLUST-38_ZNF768.UNK.0.A
Pattern NameAitacSignificance
filter2580.0419925
HCLUST-171_THAP11.UNK.0.A
Pattern NameAitacSignificance
filter1480.000137292
filter2640.048394400000000004
HCLUST-150_HNF1A.UNK.0.A
Pattern NameAitacSignificance
filter1893.4013199999999996e-06
HCLUST-57_ZNF214.UNK.0.A
Pattern NameAitacSignificance
filter1460.020849799999999998
HCLUST-126_FOXA1.UNK.0.A
Pattern NameAitacSignificance
filter810.00522988
In [ ]: