Get the QC metrics from the pipeline

In [141]:
# Imports
from basepair.imports import *
from basepair.exp.paper.config import data_sheet
In [142]:
reps = data_sheet()
reps = reps[~reps['QC report'].isnull()]
reps = reps[reps['TF Name'] != 'control']
reps['Rep Number'] = reps['Rep Number'].astype(int)
reps['id'] = [f"{row['Data Type']}-{row['TF Name']}-{row['Rep Number']}" for i,row in reps.iterrows()]
reps = reps[reps['TF Name'].isin(['oct4', 'sox2', 'nanog', 'klf4'])]
reps = reps[~((reps['TF Name'] == 'klf4')&(reps['Data Type'] == 'chipseq'))]
In [143]:
# append QC columns to reps
reps['QC_dir'] = reps['QC report'].str.replace('http://mitra.stanford.edu', '/srv/www').str.replace("/qc.html", "")

lrw = []
for i, rw in reps.iterrows():
    qc = read_json(f"{rw.QC_dir}/qc.json")
    rw['idr_reproducibility_qc/rescue_ratio'] = qc['idr_reproducibility_qc']['rescue_ratio']
    rw['idr_frip_qc/ppr/FRiP'] = qc['idr_frip_qc']['ppr']['FRiP']
    lrw.append(rw)

reps = pd.DataFrame(lrw)
In [144]:
qc_columns = ['Mnemonic', 'Data Type', 'TF Name', 'Rep Number', 'Unique deduped reads', '#Rep-IDRpeaks (N1, N2, ..)', '#IDR-optimal peaks (Np)', 'idr_reproducibility_qc/rescue_ratio', 'idr_frip_qc/ppr/FRiP']
print(reps[qc_columns].to_string())
   Mnemonic  Data Type TF Name  Rep Number Unique deduped reads  #Rep-IDRpeaks (N1, N2, ..)  #IDR-optimal peaks (Np)  idr_reproducibility_qc/rescue_ratio  idr_frip_qc/ppr/FRiP
0       C01    chipseq    oct4           1                  27M                   10770.0                    19351.0                    1.2593                           0.0326
1       C02    chipseq    oct4           2                  18M                   14402.0                    19351.0                    1.2593                           0.0326
2       C03    chipseq    sox2           1                  19M                     255.0                     9497.0                    1.3098                           0.0085
3       C04    chipseq    sox2           2                  49M                    8847.0                     9497.0                    1.3098                           0.0085
4       C05    chipseq    sox2           3                  32M                    4202.0                     9497.0                    1.3098                           0.0085
5       C06    chipseq   nanog           1                  48M                   33023.0                    40691.0                    1.0955                           0.0835
6       C07    chipseq   nanog           2                  20M                   28619.0                    40691.0                    1.0955                           0.0835
16      N01  chipnexus    oct4           1                  31M                    8772.0                    25849.0                    1.7319                           0.0455
17      N02  chipnexus    oct4           2                  50M                   10548.0                    25849.0                    1.7319                           0.0455
18      N03  chipnexus    oct4           3                  48M                    5996.0                    25849.0                    1.7319                           0.0455
19      N04  chipnexus    oct4           4                  51M                    7572.0                    25849.0                    1.7319                           0.0455
20      N05  chipnexus    oct4           5                  30M                    8226.0                    25849.0                    1.7319                           0.0455
21      N06  chipnexus    oct4           6                  33M                    5265.0                    25849.0                    1.7319                           0.0455
22      N07  chipnexus    sox2           1                  23M                    1914.0                    10999.0                    1.9843                           0.0174
23      N08  chipnexus    sox2           2                  43M                    3003.0                    10999.0                    1.9843                           0.0174
24      N09  chipnexus    sox2           3                  48M                    2150.0                    10999.0                    1.9843                           0.0174
25      N10  chipnexus    sox2           4                  26M                   11317.0                    10999.0                    1.9843                           0.0174
26      N11  chipnexus   nanog           1                  84M                   34226.0                    56459.0                    1.3328                           0.1425
27      N12  chipnexus   nanog           2                  62M                   18905.0                    56459.0                    1.3328                           0.1425
28      N13  chipnexus   nanog           3                  21M                   16845.0                    56459.0                    1.3328                           0.1425
29      N14  chipnexus   nanog           4                  30M                   42672.0                    56459.0                    1.3328                           0.1425
30      N15  chipnexus   nanog           5                  17M                   22156.0                    56459.0                    1.3328                           0.1425
31      N16  chipnexus    klf4           1                  98M                   51653.0                    57601.0                    1.4316                           0.1351
32      N17  chipnexus    klf4           2                  16M                    2524.0                    57601.0                    1.4316                           0.1351
33      N18  chipnexus    klf4           3                  21M                   18662.0                    57601.0                    1.4316                           0.1351
34      N19  chipnexus    klf4           4                  17M                    3183.0                    57601.0                    1.4316                           0.1351
35      N20  chipnexus    klf4           5                  27M                    5332.0                    57601.0                    1.4316                           0.1351
In [138]:
ddir = get_data_dir()
In [99]:
reps[['id'] + qc_columns].to_excel(f'{ddir}/gdata/data-sheet.qc.xlsx')

Zip all the qc reports

In [139]:
for i,row in reps.iterrows():
    print(row['id'])
chipseq-oct4-1
chipseq-oct4-2
chipseq-sox2-1
chipseq-sox2-2
chipseq-sox2-3
chipseq-nanog-1
chipseq-nanog-2
chipnexus-oct4-1
chipnexus-oct4-2
chipnexus-oct4-3
chipnexus-oct4-4
chipnexus-oct4-5
chipnexus-oct4-6
chipnexus-sox2-1
chipnexus-sox2-2
chipnexus-sox2-3
chipnexus-sox2-4
chipnexus-nanog-1
chipnexus-nanog-2
chipnexus-nanog-3
chipnexus-nanog-4
chipnexus-nanog-5
chipnexus-klf4-1
chipnexus-klf4-2
chipnexus-klf4-3
chipnexus-klf4-4
chipnexus-klf4-5
In [140]:
from zipfile import ZipFile

# writing files to a zipfile 
with ZipFile(f'{ddir}/gdata/qc-htmls.zip','w') as zf: 
    for i,row in reps.iterrows():
        zf.write(f"{row.QC_dir}/qc.html", arcname="qc." + row['id'] + ".html")