##################################
#                                #
# Last modified 03/25/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
from sets import Set

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s label inputfilename outfilename [-chr chr1,[chrX,...,chrY]] [-nomulti] [-normalize] [-notitle] [-stranded plus/minus]' % sys.argv[0]
        print 'Note1: this script does not work for spliced reads, use it only for single-end non-gapped read alignments' 
        print 'Note2: this script does not normalize for sequencing depth unless the normalization option is used' 
        sys.exit(1)

    label = sys.argv[1]
    inputfilename = sys.argv[2]
    outputfilename = sys.argv[3]
    outfile = open(outputfilename, 'w')

    NoMulti=False
    if '-nomulti' in sys.argv:
        NoMulti=True
        print 'will consider only unique alignments'

    WithTitle=True
    if '-notitle' in sys.argv:
        WithTitle=False

    if WithTitle:
        outline = 'track type=wiggle_0 name="' + label + '" priority=0.010 visibility=full'
        outfile.write(outline + '\n')

    doStranded=False
    if '-stranded' in sys.argv:
        doStranded=True
        strand=sys.argv[sys.argv.index('-stranded')+1]
        print 'will only consider reads in the', doStranded, 'orientation'
        if strand == 'plus':
            strand='+'
        elif strand == 'minus':
            strand='-'
        else:
            print 'incorrect strand option, exiting', sys.exit(1)


    doChr=False
    if '-chr' in sys.argv:
        doChr=True
        chrList=sys.argv[sys.argv.index('-chr')+1].split(',')
        chrDict={}
        for chr in chrList:
            chrDict[chr]=''
        print 'will consider only the following chromosomes', chrDict.keys()
    doNormalize=False
    if '-normalize' in sys.argv:
        doNormalize=True
        print 'will normalize for sequencing depth'

    CoverageDict={}

    if doNormalize:
        readNum=0.0

    lineslist = open(inputfilename)
    i=0
    for line in lineslist:
        i+=1
        if i % 1000000 == 0:
            out = str(i/1000000) + 'M lines processed'
            print out
        fields = line.strip().split('\t')
        chr=fields[0]
        if doChr and chrDict.has_key(chr):
            pass
        elif doChr:
            continue
        else:
            pass
        if CoverageDict.has_key(chr):
            pass
        else:
            CoverageDict[chr]={}
        start=int(fields[1])
        stop=int(fields[2])
        multi=int(fields[4])
        orientation=fields[5]
        if NoMulti and multi!=1:
            continue
        if doNormalize:
            readNum=readNum + 1.0/multi
        if doStranded and orientation != strand:
            continue
        scaleby=1.0
        if multi!=0:
            scaleby=1.0/(multi)
        if strand == '-':
            scaleby = -scaleby
        for j in range(start,stop):
            if CoverageDict[chr].has_key(j):
                CoverageDict[chr][j]+=scaleby
            else:
                CoverageDict[chr][j]=scaleby

    if doNormalize:
        normFactor=readNum/1000000.

    print readNum, normFactor

    chrlist=CoverageDict.keys()
    chrlist.sort()
    for chr in chrlist:
        posList=CoverageDict[chr].keys()
        posList.sort()
        if len(posList) == 0:
            continue
        sameScorePos=posList[0]
        lastPos=posList[0]
        for pos in posList[1:len(posList)-1]:
            if CoverageDict[chr][pos]==CoverageDict[chr][sameScorePos] and pos==lastPos+1:
                pass
            else:
                if doNormalize:
                    outline=chr+'\t'+str(sameScorePos)+'\t'+str(lastPos+1)+'\t'+str(CoverageDict[chr][sameScorePos]/normFactor)
                else:
                    outline=chr+'\t'+str(sameScorePos)+'\t'+str(lastPos+1)+'\t'+str(CoverageDict[chr][sameScorePos])
                outfile.write(outline+'\n')
                sameScorePos=pos
            lastPos=pos
        if doNormalize:
            outline=chr+'\t'+str(sameScorePos)+'\t'+str(lastPos+1)+'\t'+str(CoverageDict[chr][sameScorePos]/normFactor)
        else:
            outline=chr+'\t'+str(sameScorePos)+'\t'+str(lastPos+1)+'\t'+str(CoverageDict[chr][sameScorePos])
            outfile.write(outline+'\n')

    outfile.close()

run()

