##################################
#                                #
# Last modified 2024/12/27       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os

def run():

    if len(sys.argv) < 1:
        print 'usage: python %s config' % sys.argv[0]
        print '\tconfig format:'
        print '\tlabel\tfastq1\tfastq2\tbowtie_index_prefix\tchrom.sizes'
        print '\t\tNote: list second fastq as nan if only processing as SE'
        sys.exit(1)

    config = sys.argv[1]

    DataDict = {}

    linelist = open(config)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        label = fields[0]
        end1 = fields[1]
        end2 = fields[2]
        BTindex = fields[3]
        CSfile = fields[4]
        DataDict[label] = {}
        DataDict[label]['input'] = (end1,end2,BTindex,CSfile)
    
    labels = DataDict.keys()
    labels.sort()

    print 'generating countlines.sh'

    outfile = open('countlines.sh','w')
    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = 'zcat ' + end1.replace(',',' ') + ' | wc -l > ' + label + '.fastq.lines'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating map-1x36mers.sh'
    outfile = open('map-1x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/trimfastq.py ' + end1 
        outline = outline + ' 36 -stdout | /oak/stanford/groups/akundaje/marinovg/programs/bowtie-1.0.1+hamrhein_nh_patch/bowtie '
        outline = outline + BTindex
        outline = outline + ' -p 20 -v 2 -k 2 -m 1 -t --best --strata -q --sam-nh --sam - | /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools view -F4 -bT '
        outline = outline + BTindex + '.fa'
        outline = outline + ' - | /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools sort - ' + label + '.1x36mers.unique'
        outfile.write(outline + '\n')
        if end2 == 'nan':
            pass
        else:
            continue
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/trimfastq.py ' + end1 
        outline = outline + ' 36 -stdout | /oak/stanford/groups/akundaje/marinovg/programs/bowtie-1.0.1+hamrhein_nh_patch/bowtie '
        outline = outline + BTindex
        outline = outline + ' -p 20 -v 1 -a -t --best --strata -q --sam-nh --sam - | /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools view -F4 -bT '
        outline = outline + BTindex + '.fa'
        outline = outline + ' - | /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools sort - ' + label + '.SE.a'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating map-2x36mers.sh'
    outfile = open('map-2x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/PEFastqToTabDelimited.py ' + end1 + ' ' + end2
        outline = outline + ' -trim 36 36 | /oak/stanford/groups/akundaje/marinovg/programs/bowtie-1.0.1+hamrhein_nh_patch/bowtie '
        outline = outline + BTindex
        outline = outline + ' -p 20 -v 1 -a -t --best --strata -q --sam-nh -X 1000 --sam --12 - | /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools view -F4 -bT '
        outline = outline + BTindex + '.fa'
        outline = outline + ' - | /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools sort - ' + label + '.PE.a'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating run_spp.sh'
    outfile = open('run_spp.sh','w')

    for label in labels:
        outline = 'Rscript ~/code/spp/spp_package/run_spp.R -c=' + label + '.1x36mers.unique.bam'
        outline = outline + ' -p=20 -savp -rf -s=-0:2:400 -out=' + label + '.1x36mers.QC' 
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating samtools-index-1x36mers.sh'
    outfile = open('samtools-index-1x36mers.sh','w')

    for label in labels:
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools index ' + label + '.1x36mers.unique.bam'
        outfile.write(outline + '\n')
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools index ' + label + '.SE.a.bam'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating samtools-index-2x36mers.sh'
    outfile = open('samtools-index-2x36mers.sh','w')

    for label in labels:
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools index ' + label + '.PE.a.bam'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating PEInsertDistFromBAM-2x36mers.sh'
    outfile = open('PEInsertDistFromBAM-2x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/PEInsertDistFromBAM.py ' + label + '.PE.a.bam' + ' ' + CSfile + ' ' + label + '.2x36mers.InsertLength'
        outfile.write(outline + ' -uniqueBAM -normalize\n')

    outfile.close()

    print 'generating SAMstats-2x36mers.sh'
    outfile = open('SAMstats-2x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/SAMstats.py ' + label + '.PE.a.bam' + ' SAMstats-' + label + '.PE.a'
        outline = outline + ' -bam ' + CSfile + ' /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools -paired'
        outfile.write(outline + '\n')
    outfile.close()

    print 'generating SAMstats-1x36mers.sh'
    outfile = open('SAMstats-1x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        if end2 == 'nan':
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/SAMstats.py ' + label + '.SE.a.bam' + ' SAMstats-' + label + '.SE.a'
            outline = outline + ' -bam ' + CSfile + ' /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools -paired'
            outfile.write(outline + '\n')
        else:
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/SAMstats.py ' + label + '.1x36mers.unique.bam' + ' SAMstats-' + label + '.1x36mers.unique'
            outline = outline + ' -bam ' + CSfile + ' /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools -paired'
            outfile.write(outline + '\n')
        
    outfile.close()

    print 'generating makewiggle-2x36mers.sh'
    outfile = open('makewiggle-2x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + label + '.PE.a.bam' + ' ' + CSfile
        outline = outline + ' ' + label + '.PE.a.wig' + ' -notitle -RPM'
        outfile.write(outline + '\n')

    print 'generating makewiggle-stranded-2x36mers.sh'
    outfile = open('makewiggle-stranded-2x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + label + '.PE.a.bam' + ' ' + CSfile
        outline = outline + ' ' + label + '.PE.a.plus.wig' + ' -notitle -RPM -stranded +'
        outfile.write(outline + '\n')
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + label + '.PE.a.bam' + ' ' + CSfile
        outline = outline + ' ' + label + '.PE.a.minus.wig' + ' -notitle -RPM -stranded -'
        outfile.write(outline + '\n')

    print 'generating make5pwiggle-2x36mers.sh'
    outfile = open('make5pwiggle-2x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/make5primeWigglefromBAM-NH.py --- ' + label + '.PE.a.bam' + ' ' + CSfile
        outline = outline + ' ' + label + '.PE.a.5p.counts.plus.wig' + ' -notitle -uniqueBAM -stranded +'
        outfile.write(outline + '\n')
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/make5primeWigglefromBAM-NH.py --- ' + label + '.PE.a.bam' + ' ' + CSfile
        outline = outline + ' ' + label + '.PE.a.5p.counts.minus.wig' + ' -notitle -uniqueBAM -stranded - -absValue'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating wigtobigwig-wig-2x36mers.sh'
    outfile = open('wigtobigwig-wig-2x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + label + '.PE.a.wig' 
        outline = outline + ' ' + CSfile + ' ' + label + '.PE.a.bigWig'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating wigtobigwig-wigplusminus-2x36mers.sh'
    outfile = open('wigtobigwig-wigplusminus-2x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + label + '.PE.a.plus.wig' 
        outline = outline + ' ' + CSfile + ' ' + label + '.PE.a.plus.bigWig'
        outfile.write(outline + '\n')
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + label + '.PE.a.minus.wig' 
        outline = outline + ' ' + CSfile + ' ' + label + '.PE.a.minus.bigWig'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating wigtobigwig-wigplusminus5p-2x36mers.sh'
    outfile = open('wigtobigwig-wigplusminus5p-2x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + label + '.PE.a.5p.counts.plus.wig' 
        outline = outline + ' ' + CSfile + ' ' + label + '.PE.a.5p.counts.plus.bigWig'
        outfile.write(outline + '\n')
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + label + '.PE.a.5p.counts.minus.wig' 
        outline = outline + ' ' + CSfile + ' ' + label + '.PE.a.5p.counts.minus.bigWig'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating makewiggle-1x36mers.sh'
    outfile = open('makewiggle-1x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + label + '.SE.a.bam' + ' ' + CSfile
        outline = outline + ' ' + label + '.SE.a.wig' + ' -notitle -RPM -uniqueBAM'
        outfile.write(outline + '\n')

    print 'generating makewiggle-stranded-1x36mers.sh'
    outfile = open('makewiggle-stranded-1x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + label + '.SE.a.bam' + ' ' + CSfile
        outline = outline + ' ' + label + '.SE.a.plus.wig' + ' -notitle -RPM -uniqueBAM -stranded +'
        outfile.write(outline + '\n')
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + label + '.SE.a.bam' + ' ' + CSfile
        outline = outline + ' ' + label + '.SE.a.minus.wig' + ' -notitle -RPM -uniqueBAM -stranded -'
        outfile.write(outline + '\n')

    print 'generating make5pwiggle-1x36mers.sh'
    outfile = open('make5pwiggle-1x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/make5primeWigglefromBAM-NH.py --- ' + label + '.SE.a.bam' + ' ' + CSfile
        outline = outline + ' ' + label + '.SE.a.5p.counts.plus.wig' + ' -notitle -uniqueBAM -stranded +'
        outfile.write(outline + '\n')
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/make5primeWigglefromBAM-NH.py --- ' + label + '.SE.a.bam' + ' ' + CSfile
        outline = outline + ' ' + label + '.SE.a.5p.counts.minus.wig' + ' -notitle -uniqueBAM -stranded -'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating wigtobigwig-wig-1x36mers.sh'
    outfile = open('wigtobigwig-wig-1x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + label + '.SE.a.wig' 
        outline = outline + ' ' + CSfile + ' ' + label + '.SE.a.bigWig'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating wigtobigwig-wigplusminus-1x36mers.sh'
    outfile = open('wigtobigwig-wigplusminus-1x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + label + '.SE.a.plus.wig' 
        outline = outline + ' ' + CSfile + ' ' + label + '.SE.a.plus.bigWig'
        outfile.write(outline + '\n')
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + label + '.SE.a.minus.wig' 
        outline = outline + ' ' + CSfile + ' ' + label + '.SE.a.minus.bigWig'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating wigtobigwig-wigplusminus5p-1x36mers.sh'
    outfile = open('wigtobigwig-wigplusminus5p-1x36mers.sh','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + label + '.SE.a.5p.counts.plus.wig' 
        outline = outline + ' ' + CSfile + ' ' + label + '.SE.a.5p.counts.plus.bigWig'
        outfile.write(outline + '\n')
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + label + '.SE.a.5p.counts.minus.wig' 
        outline = outline + ' ' + CSfile + ' ' + label + '.SE.a.5p.counts.minus.bigWig'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating SAMstats-2x36mers.files'
    outfile = open('SAMstats-2x36mers.files','w')

    for label in labels:
        outline = label + '.PE.a' + '\t' + 'SAMstats-' + label + '.PE.a'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating SAMstats-1x36mers.files'
    outfile = open('SAMstats-1x36mers.files','w')

    for label in labels:
        (end1,end2,BTindex,CSfile) = DataDict[label]['input']
        if end2 == 'nan':
            outline = label + '.SE.a' + '\t' + 'SAMstats-' + label + '.SE.a'
            outfile.write(outline + '\n')
        else:
            outline = label + '.1x36mers.unique' + '\t' + 'SAMstats-' + label + '.1x36mers.unique'
            outfile.write(outline + '\n')

    outfile.close()

run()
