##################################
#                                #
# Last modified 09/07/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

def WriteContig(contig,outfile):

    outline = contig[0] + '\t' + str(contig[1]) + '\t' + str(contig[2]) + '\t' + contig[3]  + '\t' + str(contig[2]-contig[1]) + '\t' + str(contig[4]) + '\t' + str(contig[5])
    outfile.write(outline+'\n')

import sys
import pysam
from sets import Set

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s bowtie outputfilename [-withmulti] [-stranded]' % sys.argv[0]
        sys.exit(1)

    bowtie = sys.argv[1]
    outputfilename = sys.argv[2]

    noMulti=True
    doStranded=False

    if '-stranded' in sys.argv:
        doStranded=True

    if '-withmulti' in sys.argv:
        noMulti=False

    outfile=open(outputfilename, 'w')

    outline='#chr\tstart\tend\tstrand\tlength\tunique\tmulti'
    outfile.write(outline+'\n')

    i=0
    linelist = open(bowtie)
    contig=['chr',0,0,'.',0,0]
    if doStranded:
        pluscontig=['chr',0,0,'+',0,0]
        minuscontig=['chr',0,0,'-',0,0]
    ReadList=[]
    for line in linelist:
        i+=1
        if i % 5000000 == 0:
            print str(i/1000000) + 'M alignments processed'
        fields=line.strip().split('\t')
        chr=fields[2]
        start=int(fields[3])
        end=start + len(fields[4])
        strand=fields[1]
        multi=int(fields[6])
        if noMulti and multi != 0:
            continue
        ReadList.append((chr,start,end,strand,multi))

    ReadList.sort()
 
    print 'finished parsing alignments'

    for (chr,start,end,strand,multi) in ReadList:
        i+=1
        if i % 5000000 == 0:
            print str(i/1000000) + 'M alignments processed'
        if noMulti:
            if multi > 0:
                continue
        if doStranded:
            if strand == '+':
                if pluscontig == ['chr',0,0,'+',0,0]:
                    if multi > 0:
                        pluscontig = [chr,start,end,strand,0,1]
                    else:
                        pluscontig = [chr,start,end,strand,1,0]
                    continue
                if start >= pluscontig[1] and start <= pluscontig[2]:
                    pluscontig[2] = end
                    if multi > 0:
                        pluscontig[5] = pluscontig[5] + 1
                    else:
                        pluscontig[4] = pluscontig[4] + 1
                else:
                    WriteContig(pluscontig,outfile)
                    if multi > 0:
                        pluscontig=[chr,start,end,strand,0,1]
                    else:
                        pluscontig=[chr,start,end,strand,1,0]
            if strand == '-':
                if minuscontig == ['chr',0,0,'-',0,0]:
                    if multi > 0:
                        minuscontig = [chr,start,end,strand,0,1]
                    else:
                        minuscontig = [chr,start,end,strand,1,0]
                    continue
                if start >= minuscontig[1] and start <= minuscontig[2]:
                    minuscontig[2] = end
                    if multi > 0:
                        minuscontig[5] = minuscontig[5] + 1
                    else:
                        minuscontig[4] = minuscontig[4] + 1
                else:
                    WriteContig(minuscontig,outfile)
                    if multi > 0:
                        minuscontig=[chr,start,end,strand,0,1]
                    else:
                        minuscontig=[chr,start,end,strand,1,0]
        else:
            newstrand = '.'
            if contig == ['chr',0,0,'.',0,0]:
                if multi > 0:
                    contig = [chr,start,end,'.',0,1]
                else:
                    contig = [chr,start,end,'.',1,0]
                continue
            if start >= contig[1] and start <= contig[2]:
                contig[2] = end
                if multi > 0:
                    contig[5] = contig[5] + 1
                else:
                    contig[4] = contig[4] + 1
            else:
                WriteContig(contig,outfile)
                if multi > 0:
                    contig=[chr,start,end,'.',0,1]
                else:
                    contig=[chr,start,end,'.',1,0]
    if doStranded:
        WriteContig(pluscontig,outfile)
        WriteContig(minuscontig,outfile)
    else:
        WriteContig(contig,outfile)

    outfile.close()

run()
