# Parameters
sample_name = "MN1;D;En"
modisco_root = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_modisco2019"
mitra_subdir = "report/version2"
task_dir = "task_277-naivegw"
database_name = "CISBP"
perf_file = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/fineFactorized/task_277-naivegw/NaiveauPRC.txt"
homer_root = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_scans"
from matlas.modisco_report import modisco_report_pipeline, display_metadata
reportfile= "/mnt/lab_data/kundaje/msharmin/mouse_hem/with_tfd/full_mouse50/filtering samples_MS2.xlsx"
sheetname = "filter23"
load data from labcluster
Using TensorFlow backend. 2019-07-23 05:19:45,939 [WARNING] git-lfs not installed
display_metadata(sample_name, perf_file, reportfile, sheetname)
| MetaData Name | Description |
|---|---|
| Cell type | MN1 cell line |
| Cell Group | Cancer or Immortalized cells |
| Experiment Name | DHS |
| Experiment Group | ENCODE |
| replicate | Naïve overlap peaks | IDR peaks | TSS enrichment (< 8 is very poor <10 is low) | Final number of unique mapping, dup-filtered, chrM filtered reads | Number of reads in called peak regions | Fraction of reads in called peak regions | Number of reads in promoter regions | Fraction of reads in promoter regions | Number of reads in enhancer regions | Fraction of reads in enhancer regions |
|---|---|---|---|---|---|---|---|---|---|---|
| rep1 | 182850 | 134190 | 8.0448 | 132797234 | 21003613 | 0.1582 | 12701199 | 0.0957 | 48405821 | 0.3646 |
| rep2 | 182850 | 134190 | 20.8515 | 9177043 | 1806265 | 0.1972 | 1443158 | 0.1576 | 3415333 | 0.373 |
| rep3 | 182850 | 134190 | 12.7254 | 50639908 | 8695691 | 0.1718 | 6202610 | 0.1225 | 18694936 | 0.3693 |
| rep4 | 182850 | 134190 | 16.1059 | 49368768 | 14515866 | 0.2941 | 9008150 | 0.1825 | 19789715 | 0.401 |
| rep5 | 182850 | 134190 | 36.3251 | 2981833 | 916801 | 0.3095 | 820438 | 0.2769 | 1168459 | 0.3944 |
| rep6 | 182850 | 134190 | 36.6876 | 2113734 | 553459 | 0.2642 | 583356 | 0.2785 | 836972 | 0.3996 |
| rep7 | 182850 | 134190 | 21.235 | 25274277 | 7900036 | 0.3128 | 5458655 | 0.2161 | 10295161 | 0.4077 |
| rep8 | 182850 | 134190 | 25.4546 | 12721521 | 3690740 | 0.2906 | 2904236 | 0.2286 | 5145772 | 0.4051 |
| Metric | Value |
|---|---|
| auPRC | 0.5667 |
| Calibrated Recall at 50% FDR | 0.208 |
| Number of Positive Examples in Test Data | 152489 |
| Number of Negative Examples in Test Data | 7918362 |
| Imbalance Ratio in Test Data | 0.0189 |
| Test Chromosomes | chr2, chr3, chr19 |
from matlas.modisco_report import display_comparative_motif_sets
display_comparative_motif_sets(sample_name, homer_root, modisco_root)
TF-MoDISco is using the TensorFlow backend.
| Motif Name | Modisco | Homer |
|---|---|---|
| Nfia | ||
| Pbx3 | ||
| Rfx1 | ||
| Ctcf | ||
| Fos | ||
| Creb3 | ||
| Prrxl1 | ||
| Ebf1 |
| Motif Name | Modisco | Homer |
|---|---|---|
| Atoh1 | absent | |
| Irf1 | absent | |
| Mbtps2 | absent | |
| Sp3 | absent | |
| Gata2 | absent | |
| Smarcc2 | absent | |
| Mef2d | absent | |
| Erf | absent | |
| Hand1 | absent | |
| Hsf1 | absent |
| Motif Name | Modisco | Homer |
|---|---|---|
| Zfp637 | absent | |
| Klf15 | absent | |
| Zfp143 | absent | |
| Klf5 | absent | |
| Gabpa | absent |
modisco_report_pipeline(sample_name, modisco_root, mitra_subdir, task_dir, database_name,
importance=True, render=True)
rsync -t -av /srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_modisco2019/task_277-naivegw/cisbp_tomtomout /srv/www/kundaje/msharmin/report/version2/task_277-naivegw/ chmod -R +755 /srv/www/kundaje/msharmin/report/version2/task_277-naivegw Displaying motifs which has positive importances for the cell type
modisco_report_pipeline(sample_name, modisco_root, mitra_subdir, task_dir, database_name,
importance=False, render=True)
rsync -t -av /srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_modisco2019/task_277-naivegw/cisbp_tomtomout /srv/www/kundaje/msharmin/report/version2/task_277-naivegw/ chmod -R +755 /srv/www/kundaje/msharmin/report/version2/task_277-naivegw Displaying motifs which has negative importances for the cell type