# Parameters
sample_name = "Mammary_GLND;A;GEO"
modisco_root = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_modisco2019"
mitra_subdir = "report/version2"
task_dir = "task_103-naivegw"
database_name = "CISBP"
perf_file = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/fineFactorized/task_103-naivegw/NaiveauPRC.txt"
homer_root = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_scans"
from matlas.modisco_report import modisco_report_pipeline, display_metadata
reportfile= "/mnt/lab_data/kundaje/msharmin/mouse_hem/with_tfd/full_mouse50/filtering samples_MS2.xlsx"
sheetname = "filter23"
load data from labcluster
Using TensorFlow backend. 2019-07-22 20:23:16,174 [WARNING] git-lfs not installed
display_metadata(sample_name, perf_file, reportfile, sheetname)
| MetaData Name | Description |
|---|---|
| Cell type | Mammary Gland(WT,Basal Mammary Epithelial Cells) |
| Cell Group | Other Tissues |
| Experiment Name | ATAC |
| Experiment Group | GEO |
| replicate | Naïve overlap peaks | IDR peaks | TSS enrichment (< 8 is very poor <10 is low) | Final number of unique mapping, dup-filtered, chrM filtered reads | Number of reads in called peak regions | Fraction of reads in called peak regions | Number of reads in promoter regions | Fraction of reads in promoter regions | Number of reads in enhancer regions | Fraction of reads in enhancer regions |
|---|---|---|---|---|---|---|---|---|---|---|
| rep1 | 191456 | 139863 | 22.0502 | 20992560 | 6635013 | 0.3163 | 3847879 | 0.1835 | 8752669 | 0.4173 |
| rep2 | 191456 | 139863 | 25.297 | 8698985 | 2815887 | 0.3243 | 1853096 | 0.2134 | 3593838 | 0.414 |
| rep3 | 191456 | 139863 | 16.2715 | 32838771 | 8096602 | 0.2467 | 4790272 | 0.146 | 12824706 | 0.3908 |
| rep4 | 191456 | 139863 | 15.6667 | 43058729 | 12014362 | 0.2791 | 6447698 | 0.1498 | 17518080 | 0.407 |
| rep5 | 191456 | 139863 | 10.3609 | 29442898 | 1832223 | 0.0623 | 1928621 | 0.0655 | 9464665 | 0.3217 |
| Metric | Value |
|---|---|
| auPRC | 0.6575 |
| Calibrated Recall at 50% FDR | 0.195 |
| Number of Positive Examples in Test Data | 176806 |
| Number of Negative Examples in Test Data | 7894045 |
| Imbalance Ratio in Test Data | 0.0219 |
| Test Chromosomes | chr2, chr3, chr19 |
from matlas.modisco_report import display_comparative_motif_sets
display_comparative_motif_sets(sample_name, homer_root, modisco_root)
TF-MoDISco is using the TensorFlow backend.
| Motif Name | Modisco | Homer |
|---|---|---|
| Gabpa | ||
| Rela | ||
| Fos | ||
| Ctcf |
| Motif Name | Modisco | Homer |
|---|---|---|
| Srf | absent | |
| Runx2 | absent | |
| Tcfap2a | absent | |
| Rfx1 | absent | |
| Cebpb | absent | |
| Sp3 | absent | |
| Batf3 | absent | |
| Trp73 | absent | |
| Smarcc2 | absent | |
| Mbtps2 | absent | |
| E2f1 | absent | |
| Sox17 | absent | |
| Creb1 | absent | |
| Nfia | absent | |
| Irf1 | absent | |
| Foxi1 | absent |
| Motif Name | Modisco | Homer |
|---|---|---|
| Zfp143 | absent | |
| Creb3 | absent | |
| Pbx3 | absent | |
| Sp2 | absent | |
| Cebpg | absent |
modisco_report_pipeline(sample_name, modisco_root, mitra_subdir, task_dir, database_name,
importance=True, render=True)
rsync -t -av /srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_modisco2019/task_103-naivegw/cisbp_tomtomout /srv/www/kundaje/msharmin/report/version2/task_103-naivegw/ chmod -R +755 /srv/www/kundaje/msharmin/report/version2/task_103-naivegw Displaying motifs which has positive importances for the cell type
modisco_report_pipeline(sample_name, modisco_root, mitra_subdir, task_dir, database_name,
importance=False, render=True)
rsync -t -av /srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_modisco2019/task_103-naivegw/cisbp_tomtomout /srv/www/kundaje/msharmin/report/version2/task_103-naivegw/ chmod -R +755 /srv/www/kundaje/msharmin/report/version2/task_103-naivegw Displaying motifs which has negative importances for the cell type