In [1]:
# Parameters
sample_name = "3134_mammaryADNCRCNM;D;En"
modisco_root = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_modisco2019"
mitra_subdir = "report/version2"
task_dir = "task_267-naivegw"
database_name = "CISBP"
perf_file = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/fineFactorized/task_267-naivegw/NaiveauPRC.txt"
homer_root = "/srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_scans"
In [2]:
from matlas.modisco_report import modisco_report_pipeline, display_metadata
reportfile= "/mnt/lab_data/kundaje/msharmin/mouse_hem/with_tfd/full_mouse50/filtering samples_MS2.xlsx"
sheetname = "filter23"
load data from labcluster
Using TensorFlow backend.
2019-07-23 04:47:16,227 [WARNING] git-lfs not installed
In [3]:
display_metadata(sample_name, perf_file, reportfile, sheetname)
    Sample Information
    MetaData NameDescription
    Cell typemurine mammary adenocarcinoma cell line(3134 cell line)
    Cell GroupCancer or Immortalized cells
    Experiment NameDHS
    Experiment GroupENCODE
    Pipeline Output
    replicateNaïve overlap peaksIDR peaksTSS enrichment (< 8 is very poor <10 is low)Final number of unique mapping, dup-filtered, chrM filtered readsNumber of reads in called peak regionsFraction of reads in called peak regionsNumber of reads in promoter regionsFraction of reads in promoter regionsNumber of reads in enhancer regionsFraction of reads in enhancer regions
    rep11369478793039.7157702250931581820.450526945330.384324036680.3428
    rep101369478793051.924334425116603000.497615071260.451710074250.3019
    rep111369478793052.764116838107938900.47317326230.43665109510.3045
    rep121369478793048.1718478049722521460.47220521910.430115667380.3283
    rep131369478793046.5175283528112479330.441311419200.40388936890.3161
    rep141369478793044.3893497123722919350.461920767630.418516559710.3337
    rep151369478793035.2295779733532155860.41326444210.339727927960.3587
    rep161369478793046.6652510096823777390.46721572050.423716798450.3299
    rep171369478793024.90761915904368712500.358950428920.263472209830.3772
    rep181369478793047.8135285803012798840.44911685370.418964540.3145
    rep191369478793047.9708497191023478310.47321422690.431616303810.3285
    rep21369478793048.6307264820711728740.444110865520.41148287500.3138
    rep201369478793034.7946772237730847410.400125239560.327427775870.3602
    rep211369478793045.3599300147913249390.442611981670.40039530870.3184
    rep221369478793031.076454068613167070.290712231970.2715471990.3416
    rep231369478793038.5522710738532208000.453927304650.384824733630.3485
    rep241369478793042.1188583973825574500.438822709710.389619633870.3368
    rep251369478793047.5765293289713097900.447811885980.40639239030.3158
    rep261369478793048.4718507164324191800.477821885890.432316642410.3287
    rep271369478793037.4407722833727933650.387123901400.331225203600.3492
    rep281369478793013.538979410508904030.11237429790.093724692250.3113
    rep291369478793020.764211783371428850.12171578940.13453681510.3136
    rep31369478793045.1851485588122927230.47320846150.4316089710.3319
    rep41369478793035.8257744321329480310.396724397020.328326832730.3611
    rep51369478793034.7476758306829836570.394124505890.323727353880.3613
    rep61369478793046.623292789713037100.446511835460.40539255200.3169
    rep71369478793030.5213450622812731260.283211677040.259815365870.3418
    rep81369478793039.9602649664828064240.432824438160.376822157990.3417
    rep91369478793039.9044692310830609990.442926476500.383123541910.3406
    Modelling Metadata
    MetricValue
    auPRC0.5918
    Calibrated Recall at 50% FDR0.226
    Number of Positive Examples in Test Data112426
    Number of Negative Examples in Test Data7958425
    Imbalance Ratio in Test Data0.0139
    Test Chromosomeschr2, chr3, chr19
In [4]:
from matlas.modisco_report import display_comparative_motif_sets
display_comparative_motif_sets(sample_name, homer_root, modisco_root)
TF-MoDISco is using the TensorFlow backend.
Number of CISBP motifs obtained by TF-MoDISco and Homer-denovo
Shared Motifs
Motif NameModiscoHomer
Fos
Runx2
Ctcf
Creb3
Pbx3
Mbtps2
Nfia
Unique TF-MoDISco Motifs
Motif NameModiscoHomer
Ebf1absent
Cebpbabsent
Mef2dabsent
Bach2absent
Sp3absent
Srfabsent
Erfabsent
Hsf1absent
Relaabsent
Creb1absent
Irf1absent
Ctcflabsent
Smarcc2absent
Rfx2absent
Unique Homer Motifs
Motif NameModiscoHomer
Gabpaabsent
Sp5absent
Zfp143absent
In [5]:
modisco_report_pipeline(sample_name, modisco_root, mitra_subdir, task_dir, database_name, 
                        importance=True, render=True)
rsync -t -av /srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_modisco2019/task_267-naivegw/cisbp_tomtomout /srv/www/kundaje/msharmin/report/version2/task_267-naivegw/
chmod -R +755 /srv/www/kundaje/msharmin/report/version2/task_267-naivegw
Displaying motifs which has positive importances for the cell type
metacluster_0, # patterns: 28, # seqlets: 17937, Positive for: 3134_mammaryADNCRCNM;D;En
In [6]:
modisco_report_pipeline(sample_name, modisco_root, mitra_subdir, task_dir, database_name, 
                        importance=False, render=True)
rsync -t -av /srv/scratch/msharmin/mouse_hem/with_tfd/full_mouse50/Naive_modisco2019/task_267-naivegw/cisbp_tomtomout /srv/www/kundaje/msharmin/report/version2/task_267-naivegw/
chmod -R +755 /srv/www/kundaje/msharmin/report/version2/task_267-naivegw
No motifs with negative importance