##################################
#                                #
# Last modified 2023/06/18       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os

Cgenome = '/oak/stanford/groups/akundaje/marinovg/Stanford_bootcamp/combined_indexes/Escherichia_coli_str._K-12_substr._MG1655+Caulobacter_crescentus_NA1000.fa'
Cbowtie = '/oak/stanford/groups/akundaje/marinovg/Stanford_bootcamp/combined_indexes/Escherichia_coli_str._K-12_substr._MG1655+Caulobacter_crescentus_NA1000'
# Pgenome = '/oak/stanford/groups/akundaje/marinovg/Stanford_bootcamp/combined_indexes/Escherichia_coli_str._K-12_substr._MG1655+Schizosaccharomyces_pombe.ASM294v2.20.fa'
# Pbowtie = '/oak/stanford/groups/akundaje/marinovg/Stanford_bootcamp/combined_indexes/Escherichia_coli_str._K-12_substr._MG1655+Schizosaccharomyces_pombe.ASM294v2.20'
Sbowtie = '/oak/stanford/groups/akundaje/marinovg/genomes/Bacteria/Escherichia_coli_str._K-12_substr._MG1655-ASM584v2/bowtie-indexes/GCA_000005845.2_ASM584v2_genomic'
Sgenome = '/oak/stanford/groups/akundaje/marinovg/genomes/Bacteria/Escherichia_coli_str._K-12_substr._MG1655-ASM584v2/bowtie-indexes/GCA_000005845.2_ASM584v2_genomic.fa'
SC3CS = '/oak/stanford/groups/akundaje/marinovg/genomes/Bacteria/Escherichia_coli_str._K-12_substr._MG1655-ASM584v2/GCA_000005845.2_ASM584v2_genomic.chrom.sizes'
SCS = '/oak/stanford/groups/akundaje/marinovg/Stanford_bootcamp/combined_indexes/Escherichia_coli_str._K-12_substr._MG1655.chrom.sizes'
# PCS = '/oak/stanford/groups/akundaje/marinovg/Stanford_bootcamp/combined_indexes/Schizosaccharomyces_pombe.ASM294v2.20.chrom.sizes'
CCS = '/oak/stanford/groups/akundaje/marinovg/Stanford_bootcamp/combined_indexes/Caulobacter_crescentus_NA1000.chrom.sizes'
SCSnoM = '/oak/stanford/groups/akundaje/marinovg/Stanford_bootcamp/combined_indexes/Escherichia_coli_str._K-12_substr._MG1655.chrom.sizes'
# PCSnoM = '/oak/stanford/groups/akundaje/marinovg/Stanford_bootcamp/combined_indexes/Schizosaccharomyces_pombe.ASM294v2.20.chrom.sizes'
CCSnoM = '/oak/stanford/groups/akundaje/marinovg/Stanford_bootcamp/combined_indexes/Caulobacter_crescentus_NA1000.chrom.sizes'

def run():

    if len(sys.argv) < 1:
        print 'usage: python %s config' % sys.argv[0]
        print '\tconfig format:'
        print '\tlabel\tfastq1\tfastq2\tspecies'
        print '\tspecies key: Caulobacter|S.pombe|NA'
        sys.exit(1)

    config = sys.argv[1]

    DataDict = {}

    linelist = open(config)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        label = fields[0]
        end1 = fields[1]
        end2 = fields[2]
        species = fields[3]
        DataDict[label] = {}
        DataDict[label]['input'] = (end1,end2,species)
    
    labels = DataDict.keys()
    labels.sort()

    print 'generating countlines.sh'

    outfile = open('countlines.sh','w')
    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        outline = 'zcat ' + end1.replace(',',' ') + ' | wc -l > ' + label + '.fastq.lines'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating map.sh'
    outfile = open('map.sh','w')

    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/trimfastq.py ' + end1 
        outline = outline + ' 36 -stdout | /oak/stanford/groups/akundaje/marinovg/programs/bowtie-1.0.1+hamrhein_nh_patch/bowtie '
        if species == 'S.pombe':
            outline = outline + Pbowtie
        if species == 'Caulobacter':
            outline = outline + Cbowtie
        if species == 'NA':
            outline = outline + Sbowtie
        outline = outline + ' -p 20 -v 2 -k 2 -m 1 -t --best --strata -q --sam-nh --sam - | /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools view -F4 -bT '
        if species == 'S.pombe':
            outline = outline + Pgenome
        if species == 'Caulobacter':
            outline = outline + Cgenome
        if species == 'NA':
            outline = outline + Sgenome
        outline = outline + ' - | /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools sort - ' + label
        if species == 'S.pombe':
            outline = outline + '.1x36mers.Escherichia_coli_str._K-12_substr._MG1655+Schizosaccharomyces_pombe.ASM294v2.20.unique'
            DataDict[label]['SEbam'] = label + '.1x36mers.Escherichia_coli_str._K-12_substr._MG1655+Schizosaccharomyces_pombe.ASM294v2.20.unique.bam'
        if species == 'Caulobacter':
            outline = outline + '.1x36mers.Escherichia_coli_str._K-12_substr._MG1655+Caulobacter_crescentus_NA1000.unique'
            DataDict[label]['SEbam'] = label + '.1x36mers.Escherichia_coli_str._K-12_substr._MG1655+Caulobacter_crescentus_NA1000.unique.bam'
        if species == 'NA':
            outline = outline + '.1x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique'
            DataDict[label]['SEbam'] = label + '.1x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.bam'
        outfile.write(outline + '\n')

        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/PEFastqToTabDelimited.py ' + end1 + ' ' + end2
        outline = outline + ' -trim 36 36 | /oak/stanford/groups/akundaje/marinovg/programs/bowtie-1.0.1+hamrhein_nh_patch/bowtie '
        if species == 'S.pombe':
            outline = outline + Pbowtie
            outline = outline + ' -p 20 -v 2 -a -t --best --strata -q --sam-nh -X 1000 --sam --12 - | /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools view -F4 -bT '
        if species == 'Caulobacter':
            outline = outline + Cbowtie
            outline = outline + ' -p 20 -v 2 -a -t --best --strata -q --sam-nh -X 1000 --sam --12 - | /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools view -F4 -bT '
        if species == 'NA':
            outline = outline + Sbowtie
            outline = outline + ' -p 20 -v 2 -k 2 -m 1 -t --best --strata -q --sam-nh -X 1000 --sam --12 - | /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools view -F4 -bT '
        if species == 'S.pombe':
            outline = outline + Pgenome
        if species == 'Caulobacter':
            outline = outline + Cgenome
        if species == 'NA':
            outline = outline + Sgenome
        outline = outline + ' - | /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools sort - ' + label
        if species == 'S.pombe':
            outline = outline + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655+Schizosaccharomyces_pombe.ASM294v2.20.a'
            DataDict[label]['PEbam'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655+Schizosaccharomyces_pombe.ASM294v2.20.a.bam'
        if species == 'Caulobacter':
            outline = outline + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655+Caulobacter_crescentus_NA1000.a'
            DataDict[label]['PEbam'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655+Caulobacter_crescentus_NA1000.a.bam'
        if species == 'NA':
            outline = outline + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique'
            DataDict[label]['PEbam'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.bam'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating samtools-index.sh'
    outfile = open('samtools-index.sh','w')

    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools index ' + DataDict[label]['SEbam']
        outfile.write(outline + '\n')
        outline = '/oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools index ' + DataDict[label]['PEbam']
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating PEInsertDistFromBAM.sh'
    outfile = open('PEInsertDistFromBAM.sh','w')

    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        if species != 'NA':
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/PEInsertDistFromBAM.py ' + DataDict[label]['PEbam'] + ' ' + SCS + ' ' + label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.InsertLength'
            outfile.write(outline + '\n')
        outline = 'python /oak/stanford/groups/akundaje/marinovg/code/PEInsertDistFromBAM.py ' + DataDict[label]['PEbam']
        if species == 'S.pombe':
            outline = outline + ' ' + PCS + ' ' + label + '.2x36mers.Schizosaccharomyces_pombe.ASM294v2.20.InsertLength'
        if species == 'Caulobacter':
            outline = outline + ' ' + CCS + ' ' + label + '.2x36mers.Caulobacter_crescentus_NA1000.InsertLength'
        if species == 'NA':
            outline = outline + ' ' + SC3CS + ' ' + label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.InsertLength'
        outfile.write(outline + '\n')

    outfile.close()

    print 'generating SAMstats.sh'
    outfile = open('SAMstats.sh','w')

    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        if species != 'NA':
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/SAMstats.py ' + DataDict[label]['PEbam'] + ' SAMstats-'
            outline = outline + label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655 -bam ' + SCS + ' /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools -paired'
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/SAMstats.py ' + DataDict[label]['PEbam'] + ' SAMstats-'
            outline = outline + label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.nochrM.ERMTOC -bam ' + SCSnoM + ' /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools -paired -excludeReadsMappingToOtherChromosomes'
            outfile.write(outline + '\n')
        if species == 'S.pombe':
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/SAMstats.py ' + DataDict[label]['PEbam'] + ' SAMstats-'
            outline = outline + label + '.2x36mers.Schizosaccharomyces_pombe.ASM294v2.20 -bam ' + PCS + ' /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools -paired'
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/SAMstats.py ' + DataDict[label]['PEbam'] + ' SAMstats-'
            outline = outline + label + '.2x36mers.Schizosaccharomyces_pombe.ASM294v2.20.nochrM.ERMTOC -bam ' + PCSnoM + ' /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools -paired -excludeReadsMappingToOtherChromosomes'
            outfile.write(outline + '\n')
        if species == 'Caulobacter':
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/SAMstats.py ' + DataDict[label]['PEbam'] + ' SAMstats-'
            outline = outline + label + '.2x36mers.Caulobacter_crescentus_NA1000 -bam ' + CCS + ' /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools -paired'
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/SAMstats.py ' + DataDict[label]['PEbam'] + ' SAMstats-'
            outline = outline + label + '.2x36mers.Caulobacter_crescentus_NA1000.nochrM.ERMTOC -bam ' + CCSnoM + ' /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools -paired -excludeReadsMappingToOtherChromosomes'
            outfile.write(outline + '\n')
        if species == 'NA':
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/SAMstats.py ' + DataDict[label]['SEbam'] + ' SAMstats-'
            outline = outline + label + '.1x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique -bam ' + SC3CS + ' /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools -paired'
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/SAMstats.py ' + DataDict[label]['PEbam'] + ' SAMstats-'
            outline = outline + label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655 -bam ' + SC3CS + ' /oak/stanford/groups/akundaje/marinovg/programs/samtools-0.1.18/samtools -paired'
            outfile.write(outline + '\n')
        
    outfile.close()

    print 'generating makewiggle.sh'
    outfile = open('makewiggle.sh','w')

    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        if species != 'NA':
            DataDict[label]['1x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.nochrM.wig'] = label + '.1x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.nochrM.wig'
            DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.wig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.wig'
            DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment.wig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment.wig'
            DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig'
            DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp_Ecoli.wig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp_Ecoli.wig'
            DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment_Ecoli.wig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment_Ecoli.wig'
            DataDict[label]['1x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.nochrM.bigWig'] = label + '.1x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.nochrM.bigWig'
            DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.bigWig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.bigWig'
            DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment.bigWig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment.bigWig'
            DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp.bigWig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp.bigWig'
            DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp_Ecoli.bigWig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp_Ecoli.bigWig'
            DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment_Ecoli.bigWig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment_Ecoli.bigWig'
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + DataDict[label]['PEbam'] + ' ' + SCSnoM
            outline = outline + ' ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.wig'] + ' -notitle -RPM -excludeReadsMappingToOtherChromosomes'
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + DataDict[label]['PEbam'] + ' ' + SCSnoM
            outline = outline + ' ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment.wig'] + ' -notitle -RPM -excludeReadsMappingToOtherChromosomes -fullFragment'
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + DataDict[label]['PEbam'] + ' ' + SCSnoM
            outline = outline + ' ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig'] + ' -notitle -RPM -excludeReadsMappingToOtherChromosomes -fullFragmentMidPoint 25'
            outfile.write(outline + '\n')
        if species == 'S.pombe':
            DataDict[label]['1x36mers.spike.unique.nochrM.wig'] = label + '.1x36mers.Schizosaccharomyces_pombe.ASM294v2.20.unique.nochrM.wig'
            DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.wig'] = label + '.2x36mers.Schizosaccharomyces_pombe.ASM294v2.20.a.nochrM.ERMTOC.wig'
            DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragment.wig'] = label + '.2x36mers.Schizosaccharomyces_pombe.ASM294v2.20.a.nochrM.ERMTOC.fragment.wig'
            DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig'] = label + '.2x36mers.Schizosaccharomyces_pombe.ASM294v2.20.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig'
            DataDict[label]['1x36mers.spike.unique.nochrM.bigWig'] = label + '.1x36mers.Schizosaccharomyces_pombe.ASM294v2.20.unique.nochrM.bigWig'
            DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.bigWig'] = label + '.2x36mers.Schizosaccharomyces_pombe.ASM294v2.20.a.nochrM.ERMTOC.bigWig'
            DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragment.bigWig'] = label + '.2x36mers.Schizosaccharomyces_pombe.ASM294v2.20.a.nochrM.ERMTOC.fragment.bigWig'
            DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragmentMidPoint_50bp.bigWig'] = label + '.2x36mers.Schizosaccharomyces_pombe.ASM294v2.20.a.nochrM.ERMTOC.fragmentMidPoint_50bp.bigWig'
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + DataDict[label]['PEbam'] + ' ' + PCSnoM
            outline = outline + ' ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.wig'] + ' -notitle -RPM -excludeReadsMappingToOtherChromosomes'
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + DataDict[label]['PEbam'] + ' ' + PCSnoM
            outline = outline + ' ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragment.wig'] + ' -notitle -RPM -excludeReadsMappingToOtherChromosomes -fullFragment'
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + DataDict[label]['PEbam'] + ' ' + PCSnoM
            outline = outline + ' ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig'] + ' -notitle -RPM -excludeReadsMappingToOtherChromosomes -fullFragmentMidPoint 25'
            outfile.write(outline + '\n')
        if species == 'Caulobacter':
            DataDict[label]['1x36mers.spike.unique.nochrM.wig'] = label + '.1x36mers.Caulobacter_crescentus_NA1000.unique.nochrM.wig'
            DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.wig'] = label + '.2x36mers.Caulobacter_crescentus_NA1000.a.nochrM.ERMTOC.wig'
            DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragment.wig'] = label + '.2x36mers.Caulobacter_crescentus_NA1000.a.nochrM.ERMTOC.fragment.wig'
            DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig'] = label + '.2x36mers.Caulobacter_crescentus_NA1000.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig'
            DataDict[label]['1x36mers.spike.unique.nochrM.bigWig'] = label + '.1x36mers.Caulobacter_crescentus_NA1000.unique.nochrM.bigWig'
            DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.bigWig'] = label + '.2x36mers.Caulobacter_crescentus_NA1000.a.nochrM.ERMTOC.bigWig'
            DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragment.bigWig'] = label + '.2x36mers.Caulobacter_crescentus_NA1000.a.nochrM.ERMTOC.fragment.bigWig'
            DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragmentMidPoint_50bp.bigWig'] = label + '.2x36mers.Caulobacter_crescentus_NA1000.a.nochrM.ERMTOC.fragmentMidPoint_50bp.bigWig'
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + DataDict[label]['PEbam'] + ' ' + CCSnoM
            outline = outline + ' ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.wig'] + ' -notitle -RPM -excludeReadsMappingToOtherChromosomes'
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + DataDict[label]['PEbam'] + ' ' + CCSnoM
            outline = outline + ' ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragment.wig'] + ' -notitle -RPM -excludeReadsMappingToOtherChromosomes -fullFragment'
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + DataDict[label]['PEbam'] + ' ' + CCSnoM
            outline = outline + ' ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig'] + ' -notitle -RPM -excludeReadsMappingToOtherChromosomes -fullFragmentMidPoint 25'
            outfile.write(outline + '\n')
        if species == 'NA':
            DataDict[label]['1x36mers.unique.nochrM.wig'] = label + '.1x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.nochrM.wig'
            DataDict[label]['2x36mers.unique.wig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.wig'
            DataDict[label]['2x36mers.unique.fragment.wig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.fragment.wig'
            DataDict[label]['2x36mers.unique.fragmentMidPoint_50bp.wig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.fragmentMidPoint_50bp.wig'
            DataDict[label]['1x36mers.unique.nochrM.bigWig'] = label + '.1x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.nochrM.bigWig'
            DataDict[label]['2x36mers.unique.bigWig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.bigWig'
            DataDict[label]['2x36mers.unique.fragment.bigWig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.fragment.bigWig'
            DataDict[label]['2x36mers.unique.fragmentMidPoint_50bp.bigWig'] = label + '.2x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.fragmentMidPoint_50bp.bigWig'
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + DataDict[label]['PEbam'] + ' ' + SC3CS
            outline = outline + ' ' + DataDict[label]['2x36mers.unique.wig'] + ' -notitle -RPM'
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + DataDict[label]['PEbam'] + ' ' + SC3CS
            outline = outline + ' ' + DataDict[label]['2x36mers.unique.fragment.wig'] + ' -notitle -RPM -fullFragment'
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/makewigglefromBAM-NH.py --- ' + DataDict[label]['PEbam'] + ' ' + SC3CS
            outline = outline + ' ' + DataDict[label]['2x36mers.unique.fragmentMidPoint_50bp.wig'] + ' -notitle -RPM -fullFragmentMidPoint 25'
            outfile.write(outline + '\n')

    outfile.close()

    print 'generating columnrename.sh'
    outfile = open('columnrename.sh','w')

    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        print label
        print DataDict[label]
        if species != 'NA':
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/columnRename.py ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig']
            outline = outline + ' 0 /oak/stanford/groups/akundaje/marinovg/Stanford_bootcamp/combined_indexes/Escherichia_coli_str._K-12_substr._MG1655.old_to_new 1 0 > '
            outline = outline + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp_Ecoli.wig']
            outfile.write(outline + '\n')
            outline = 'python /oak/stanford/groups/akundaje/marinovg/code/columnRename.py ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment.wig']
            outline = outline + ' 0 /oak/stanford/groups/akundaje/marinovg/Stanford_bootcamp/combined_indexes/Escherichia_coli_str._K-12_substr._MG1655.old_to_new 1 0 > '
            outline = outline + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment_Ecoli.wig']
            outfile.write(outline + '\n')

    outfile.close()

    print 'generating wigtobigwig.sh'
    outfile = open('wigtobigwig.sh','w')

    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        if species != 'NA':
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['1x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.nochrM.wig'] + ' ' + SCS + ' ' + DataDict[label]['1x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.nochrM.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.wig'] + ' ' + SCS + ' ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment.wig'] + ' ' + SCS + ' ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig'] + ' ' + SCS + ' ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp_Ecoli.wig'] + ' ' + SC3CS + ' ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp_Ecoli.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment_Ecoli.wig'] + ' ' + SC3CS + ' ' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragment_Ecoli.bigWig']
            outfile.write(outline + '\n')
        if species == 'S.pombe':
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['1x36mers.spike.unique.nochrM.wig'] + ' ' + PCS + ' ' + DataDict[label]['1x36mers.spike.unique.nochrM.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.wig'] + ' ' + PCS + ' ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragment.wig'] + ' ' + PCS + ' ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragment.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig'] + ' ' + PCS + ' ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragmentMidPoint_50bp.bigWig']
            outfile.write(outline + '\n')
        if species == 'Caulobacter':
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['1x36mers.spike.unique.nochrM.wig'] + ' ' + CCS + ' ' + DataDict[label]['1x36mers.spike.unique.nochrM.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.wig'] + ' ' + CCS + ' ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragment.wig'] + ' ' + CCS + ' ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragment.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig'] + ' ' + CCS + ' ' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.fragmentMidPoint_50bp.bigWig']
            outfile.write(outline + '\n')
        if species == 'NA':
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['1x36mers.unique.nochrM.wig'] + ' ' + SC3CS + ' ' + DataDict[label]['1x36mers.unique.nochrM.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.unique.wig'] + ' ' + SC3CS + ' ' + DataDict[label]['2x36mers.unique.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.unique.fragment.wig'] + ' ' + SC3CS + ' ' + DataDict[label]['2x36mers.unique.fragment.bigWig']
            outfile.write(outline + '\n')
            outline = '/oak/stanford/groups/akundaje/marinovg/programs/UCSC-utils-2017-07-13/wigToBigWig ' + DataDict[label]['2x36mers.unique.fragmentMidPoint_50bp.wig'] + ' ' + SC3CS + ' ' + DataDict[label]['2x36mers.unique.fragmentMidPoint_50bp.bigWig']
            outfile.write(outline + '\n')

    outfile.close()

    print 'generating cumulativeGeneProfiles.Escherichia_coli_str._K-12_substr._MG1655.files'
    outfile = open('cumulativeGeneProfiles.Escherichia_coli_str._K-12_substr._MG1655.files','w')
    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        if species == 'NA':
            continue
        outline = label + '\t' + DataDict[label]['1x36mers.Escherichia_coli_str._K-12_substr._MG1655.unique.nochrM.wig']
        outfile.write(outline + '\n')
    outfile.close()

    print 'generating cumulativeGeneProfiles.Caulobacter.files'
    outfile = open('cumulativeGeneProfiles.Caulobacter.files','w')
    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        if species != 'Caulobacter':
            continue
        outline = label + '\t' + DataDict[label]['1x36mers.spike.unique.nochrM.wig']
        outfile.write(outline + '\n')
    outfile.close()

    print 'generating cumulativeGeneProfiles.S_pombe.files'
    outfile = open('cumulativeGeneProfiles.S_pombe.files','w')
    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        if species != 'S.pombe':
            continue
        outline = label + '\t' + DataDict[label]['1x36mers.spike.unique.nochrM.wig']
        outfile.write(outline + '\n')
    outfile.close()


    print 'generating cumulativeGeneProfiles.with_multimappers.nochrM.ERMTOC.Escherichia_coli_str._K-12_substr._MG1655.files'
    outfile = open('cumulativeGeneProfiles.with_multimappers.nochrM.ERMTOC.Escherichia_coli_str._K-12_substr._MG1655.files','w')
    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        if species == 'NA':
            continue
        (end1,end2,species) = DataDict[label]['input']
        outline = label + '\t' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.wig']
        outfile.write(outline + '\n')
    outfile.close()

    print 'generating cumulativeGeneProfiles.with_multimappers.nochrM.ERMTOC.Caulobacter.files'
    outfile = open('cumulativeGeneProfiles.with_multimappers.nochrM.ERMTOC.Caulobacter.files','w')
    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        if species != 'Caulobacter':
            continue
        outline = label + '\t' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.wig']
        outfile.write(outline + '\n')
    outfile.close()

    print 'generating cumulativeGeneProfiles.with_multimappers.nochrM.ERMTOC.S_pombe.files'
    outfile = open('cumulativeGeneProfiles.with_multimappers.nochrM.ERMTOC.S_pombe.files','w')
    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        if species != 'S.pombe':
            continue
        outline = label + '\t' + DataDict[label]['2x36mers.spike.a.nochrM.ERMTOC.wig']
        outfile.write(outline + '\n')
    outfile.close()

    print 'generating cumulativeGeneProfiles.with_multimappers.nochrM.ERMTOC.Escherichia_coli_str._K-12_substr._MG1655-fullFragmentMidPoint.files'
    outfile = open('cumulativeGeneProfiles.with_multimappers.nochrM.ERMTOC.Escherichia_coli_str._K-12_substr._MG1655-fullFragmentMidPoint.files','w')
    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        if species == 'NA':
            continue
        outline = label + '\t' + DataDict[label]['2x36mers.Escherichia_coli_str._K-12_substr._MG1655.a.nochrM.ERMTOC.fragmentMidPoint_50bp.wig']
        outfile.write(outline + '\n')
    outfile.close()

    print 'generating cumulativeGeneProfiles.Escherichia_coli_str._K-12_substr._MG1655-fullFragmentMidPoint.files'
    outfile = open('cumulativeGeneProfiles.Escherichia_coli_str._K-12_substr._MG1655-fullFragmentMidPoint.files','w')
    for label in labels:
        (end1,end2,species) = DataDict[label]['input']
        if species != 'NA':
            continue
        outline = label + '\t' + DataDict[label]['2x36mers.unique.fragmentMidPoint_50bp.wig']
        outfile.write(outline + '\n')
    outfile.close()

run()
