In [1]:
%matplotlib inline
import glob
import os
import mini_ataqc
In [2]:
QCPLOTS_DIR = 'qc_plots/'

DATA_DIR = '/srv/scratch/training_camp/tc2016/user23/data/'#'/srv/scratch/shared/nandi/projects/training-camp-2016/run/'
ANALYSIS_DIR = '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/'
WEBSITE_DIR = '/srv/www/kundaje/training_camp_2016/'
WEBSITE_QCPLOTS_DIR = os.path.join(WEBSITE_DIR, QCPLOTS_DIR)

SAMPLE_DIRS = glob.glob(os.path.join(DATA_DIR, '*'))
SAMPLE_NAMES = list(map(os.path.basename, SAMPLE_DIRS))

TSS_FILE = 'sacCer3_Ensemble_TSS.bed'
CHR_SIZES = '/srv/scratch/training_camp/saccer3/sacCer3.chrom.sizes'

READ_LEN = 77
In [3]:
final_bams = glob.glob(os.path.join(ANALYSIS_DIR, '*nodup.bam'))
print(final_bams)
['/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Kz_800_S10_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Mz_3_S21_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/U_2_S29_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Ct_3_S24_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/U_1_S28_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Ct_300_S3_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/DMSO_2_S32_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Cz_3_S18_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Ct_800_S9_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Mz_300_S2_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Kt_3_S15_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Mz_800_S8_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/DMSO_1_S31_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/U_3_S30_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Kz_300_S4_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Mz_2_S20_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/It_300_S5_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Mz_1_S19_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/DMSO_2_S12_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Cz_1_S16_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/It_3_S27_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Kt_2_S14_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Cz_2_S17_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Ct_2_S23_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Cz_800_S7_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Kt_1_S13_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/It_800_S11_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/It_1_S25_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/It_2_S26_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Cz_300_S1_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/DMSO_1_S6_R1.trimmed.nodup.bam', '/srv/scratch/training_camp/tc2016/user23/analysis/aligned/Ct_1_S22_R1.trimmed.nodup.bam']
In [5]:
%matplotlib inline
for final_bam in final_bams:
    
    fname_prefix = os.path.basename(final_bam).split('.')[0]
    output_prefix = os.path.join(QCPLOTS_DIR, fname_prefix)
    
    print('Running vplot for sample {}'.format(fname_prefix))
    
    mini_ataqc.make_vplot(final_bam, TSS_FILE, output_prefix, CHR_SIZES, READ_LEN, bins=400, bp_edge=2000,
               processes=8, greenleaf_norm=True)
    
    print('Running insert metrics for sample {}'.format(fname_prefix))
    
    mini_ataqc.get_insert_distribution(final_bam, output_prefix)
Running vplot for sample Kz_800_S10_R1
Running insert metrics for sample Kz_800_S10_R1
Running vplot for sample Mz_3_S21_R1
Running insert metrics for sample Mz_3_S21_R1
Running vplot for sample U_2_S29_R1
Running insert metrics for sample U_2_S29_R1
Running vplot for sample Ct_3_S24_R1
Running insert metrics for sample Ct_3_S24_R1
Running vplot for sample U_1_S28_R1
Running insert metrics for sample U_1_S28_R1
Running vplot for sample Ct_300_S3_R1
Running insert metrics for sample Ct_300_S3_R1
Running vplot for sample DMSO_2_S32_R1
Running insert metrics for sample DMSO_2_S32_R1
Running vplot for sample Cz_3_S18_R1
Running insert metrics for sample Cz_3_S18_R1
Running vplot for sample Ct_800_S9_R1
Running insert metrics for sample Ct_800_S9_R1
Running vplot for sample Mz_300_S2_R1
Running insert metrics for sample Mz_300_S2_R1
Running vplot for sample Kt_3_S15_R1
/opt/conda/envs/py2/lib/python2.7/site-packages/matplotlib/pyplot.py:516: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
Running insert metrics for sample Kt_3_S15_R1
Running vplot for sample Mz_800_S8_R1
Running insert metrics for sample Mz_800_S8_R1
Running vplot for sample DMSO_1_S31_R1
Running insert metrics for sample DMSO_1_S31_R1
Running vplot for sample U_3_S30_R1
Running insert metrics for sample U_3_S30_R1
Running vplot for sample Kz_300_S4_R1
Running insert metrics for sample Kz_300_S4_R1
Running vplot for sample Mz_2_S20_R1
Running insert metrics for sample Mz_2_S20_R1
Running vplot for sample It_300_S5_R1
Running insert metrics for sample It_300_S5_R1
Running vplot for sample Mz_1_S19_R1
Running insert metrics for sample Mz_1_S19_R1
Running vplot for sample DMSO_2_S12_R1
Running insert metrics for sample DMSO_2_S12_R1
Running vplot for sample Cz_1_S16_R1
Running insert metrics for sample Cz_1_S16_R1
Running vplot for sample It_3_S27_R1
Running insert metrics for sample It_3_S27_R1
Running vplot for sample Kt_2_S14_R1
Running insert metrics for sample Kt_2_S14_R1
Running vplot for sample Cz_2_S17_R1
Running insert metrics for sample Cz_2_S17_R1
Running vplot for sample Ct_2_S23_R1
Running insert metrics for sample Ct_2_S23_R1
Running vplot for sample Cz_800_S7_R1
Running insert metrics for sample Cz_800_S7_R1
Running vplot for sample Kt_1_S13_R1
Running insert metrics for sample Kt_1_S13_R1
Running vplot for sample It_800_S11_R1
Running insert metrics for sample It_800_S11_R1
Running vplot for sample It_1_S25_R1
Running insert metrics for sample It_1_S25_R1
Running vplot for sample It_2_S26_R1
Running insert metrics for sample It_2_S26_R1
Running vplot for sample Cz_300_S1_R1
Running insert metrics for sample Cz_300_S1_R1
Running vplot for sample DMSO_1_S6_R1
Running insert metrics for sample DMSO_1_S6_R1
Running vplot for sample Ct_1_S22_R1
Running insert metrics for sample Ct_1_S22_R1
In [ ]:
cp -r {QCPLOTS_DIR} {WEBSITE_QCPLOTS_DIR}
In [ ]: