# Parameters
sample_name = "NIH3T3;D;En"
modisco_root = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_modisco2019"
mitra_subdir = "report/version2"
task_dir = "task_275-naivegw"
database_name = "CISBP"
perf_file = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/fineFactorized/task_275-naivegw/NaiveauPRC.txt"
homer_root = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_scans"
from matlas.modisco_report import modisco_report_pipeline, display_metadata
reportfile= "/mnt/lab_data/kundaje/msharmin/mouse_hem/with_tfd/full_mouse50/filtering samples_MS2.xlsx"
sheetname = "filter23"
load data from labcluster
Using TensorFlow backend. 2019-07-23 05:12:17,896 [WARNING] git-lfs not installed
display_metadata(sample_name, perf_file, reportfile, sheetname)
| MetaData Name | Description |
|---|---|
| Cell type | NIH3T3 |
| Cell Group | Cancer or Immortalized cells |
| Experiment Name | DHS |
| Experiment Group | ENCODE |
| replicate | Naïve overlap peaks | IDR peaks | TSS enrichment (< 8 is very poor <10 is low) | Final number of unique mapping, dup-filtered, chrM filtered reads | Number of reads in called peak regions | Fraction of reads in called peak regions | Number of reads in promoter regions | Fraction of reads in promoter regions | Number of reads in enhancer regions | Fraction of reads in enhancer regions |
|---|---|---|---|---|---|---|---|---|---|---|
| rep1 | 173083 | 115773 | 30.7691 | 8955599 | 2675929 | 0.2994 | 1813611 | 0.2029 | 3852563 | 0.4311 |
| rep10 | 173083 | 115773 | 13.9453 | 2352061 | 279571 | 0.1195 | 249220 | 0.1065 | 1058183 | 0.4524 |
| rep11 | 173083 | 115773 | 31.4668 | 9560902 | 2922703 | 0.3063 | 1986175 | 0.2081 | 4115920 | 0.4313 |
| rep12 | 173083 | 115773 | 31.2051 | 9522679 | 2888108 | 0.3039 | 1966766 | 0.2069 | 4096370 | 0.431 |
| rep13 | 173083 | 115773 | 30.9627 | 9124653 | 2745172 | 0.3015 | 1878526 | 0.2063 | 3930133 | 0.4316 |
| rep14 | 173083 | 115773 | 30.8126 | 9749232 | 2999203 | 0.3082 | 2027871 | 0.2084 | 4223334 | 0.434 |
| rep15 | 173083 | 115773 | 31.4187 | 9547927 | 2903821 | 0.3047 | 1978383 | 0.2076 | 4108061 | 0.4311 |
| rep16 | 173083 | 115773 | 30.599 | 9229782 | 2755550 | 0.2991 | 1883501 | 0.2045 | 3972596 | 0.4313 |
| rep17 | 173083 | 115773 | 31.094 | 9558001 | 2915374 | 0.3056 | 1977231 | 0.2073 | 4111049 | 0.431 |
| rep18 | 173083 | 115773 | 31.1128 | 8588610 | 2533474 | 0.2956 | 1749377 | 0.2041 | 3698257 | 0.4315 |
| rep19 | 173083 | 115773 | 39.7467 | 333608 | 15724 | 0.0483 | 76854 | 0.2363 | 139118 | 0.4277 |
| rep2 | 173083 | 115773 | 31.1411 | 9219781 | 2767080 | 0.3007 | 1883958 | 0.2047 | 3970275 | 0.4315 |
| rep20 | 173083 | 115773 | 30.2899 | 8847307 | 2636965 | 0.2987 | 1804633 | 0.2044 | 3808074 | 0.4313 |
| rep21 | 173083 | 115773 | 31.0712 | 9426047 | 2869017 | 0.305 | 1944767 | 0.2067 | 4058549 | 0.4314 |
| rep22 | 173083 | 115773 | 31.1637 | 9441928 | 2870608 | 0.3046 | 1956225 | 0.2076 | 4063422 | 0.4312 |
| rep23 | 173083 | 115773 | 30.8915 | 9744515 | 3017130 | 0.3102 | 2035212 | 0.2093 | 4221042 | 0.434 |
| rep24 | 173083 | 115773 | 20.2332 | 3596764 | 826487 | 0.2308 | 518131 | 0.1447 | 1639437 | 0.4578 |
| rep25 | 173083 | 115773 | 16.2121 | 20385797 | 4614921 | 0.2266 | 2287940 | 0.1123 | 9087380 | 0.4462 |
| rep3 | 173083 | 115773 | 30.8067 | 9558704 | 2914903 | 0.3055 | 1977659 | 0.2073 | 4110448 | 0.4309 |
| rep4 | 173083 | 115773 | 31.0242 | 9349720 | 2830063 | 0.3033 | 1920223 | 0.2058 | 4029837 | 0.4319 |
| rep5 | 173083 | 115773 | 30.4008 | 9452616 | 2848586 | 0.3019 | 1947733 | 0.2065 | 4067785 | 0.4312 |
| rep6 | 173083 | 115773 | 31.0729 | 9509873 | 2898122 | 0.3053 | 1959179 | 0.2064 | 4093673 | 0.4313 |
| rep7 | 173083 | 115773 | 30.7313 | 8967548 | 2674525 | 0.2989 | 1821727 | 0.2036 | 3860845 | 0.4314 |
| rep8 | 173083 | 115773 | 30.8715 | 9247479 | 2779634 | 0.3012 | 1896445 | 0.2055 | 3981285 | 0.4314 |
| rep9 | 173083 | 115773 | 30.6215 | 8719936 | 2598088 | 0.2986 | 1768828 | 0.2033 | 3754374 | 0.4315 |
| Metric | Value |
|---|---|
| auPRC | 0.6002 |
| Calibrated Recall at 50% FDR | 0.219 |
| Number of Positive Examples in Test Data | 148620 |
| Number of Negative Examples in Test Data | 7922231 |
| Imbalance Ratio in Test Data | 0.0184 |
| Test Chromosomes | chr2, chr3, chr19 |
from matlas.modisco_report import display_comparative_motif_sets
display_comparative_motif_sets(sample_name, homer_root, modisco_root)
TF-MoDISco is using the TensorFlow backend.
| Motif Name | Modisco | Homer |
|---|---|---|
| Nfic | ||
| Nfia | ||
| Ctcf | ||
| Fos | ||
| Rest | ||
| Mbtps2 |
| Motif Name | Modisco | Homer |
|---|---|---|
| Ets1 | absent | |
| Smarcc2 | absent | |
| Rfx1 | absent | |
| Cebpb | absent | |
| Sp3 | absent | |
| Ebf1 | absent | |
| Foxi1 | absent | |
| Mef2a | absent | |
| Irf1 | absent | |
| Hsf1 | absent | |
| Creb3 | absent | |
| Bhlha15 | absent | |
| Tcfec | absent | |
| Srf | absent |
| Motif Name | Modisco | Homer |
|---|---|---|
| Zfp143 | absent | |
| Sp2 | absent | |
| E4f1 | absent | |
| Cebpg | absent | |
| Gabpa | absent | |
| Tbp | absent | |
| Pbx3 | absent |
modisco_report_pipeline(sample_name, modisco_root, mitra_subdir, task_dir, database_name,
importance=True, render=True)
rsync -t -av /srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_modisco2019/task_275-naivegw/cisbp_tomtomout /srv/www/kundaje/msharmin/report/version2/task_275-naivegw/ chmod -R +755 /srv/www/kundaje/msharmin/report/version2/task_275-naivegw Displaying motifs which has positive importances for the cell type
modisco_report_pipeline(sample_name, modisco_root, mitra_subdir, task_dir, database_name,
importance=False, render=True)
rsync -t -av /srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_modisco2019/task_275-naivegw/cisbp_tomtomout /srv/www/kundaje/msharmin/report/version2/task_275-naivegw/ chmod -R +755 /srv/www/kundaje/msharmin/report/version2/task_275-naivegw Displaying motifs which has negative importances for the cell type