##################################
#                                #
# Last modified 2017/08/25       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
from sets import Set

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s label inputfilename outfilename [-chr chr1,[chrX,...,chrY]] [-chrField ID] [-nomulti] [-normalize] [-notitle] [-binary]' % sys.argv[0]
        print 'Note1: this script does not work for spliced reads, use it only for single-end non-gapped read alignments' 
        print 'Note2: this script does not normalize for sequencing depth unless the normalization option is used and it does not account for the presence of multireads' 
        print 'Note3: bed format assumed: chr <tab> start <tab> stop' 
        sys.exit(1)

    label = sys.argv[1]
    inputfilename = sys.argv[2]
    outputfilename = sys.argv[3]
    outfile = open(outputfilename, 'w')

    NoMulti=False
    if '-nomulti' in sys.argv:
        NoMulti=True
        print 'will consider only unique alignments'

    WithTitle=True
    if '-notitle' in sys.argv:
        WithTitle=False

    if WithTitle:
        outline = 'track type=wiggle_0 name="' + label + '" priority=0.010 visibility=full'
        outfile.write(outline + '\n')

    chrField=0
    if '-chrField' in sys.argv:
        chrField=int(sys.argv[sys.argv.index('-chrField')+1])

    doBinary = False
    if '-binary' in sys.argv:
        doBinary=True
        print 'will output binary scores'

    doChr=False
    if '-chr' in sys.argv:
        doChr=True
        chrList=sys.argv[sys.argv.index('-chr')+1].split(',')
        chrDict={}
        for chr in chrList:
            chrDict[chr]=''
        print 'will consider only the following chromosomes', chrDict.keys()
    doNormalize=False
    if '-normalize' in sys.argv:
        doNormalize=True
        print 'will normalize for sequencing depth'

    CoverageDict={}

    if doNormalize:
        readIDList=[]

    lineslist = open(inputfilename)
    i=0
    for line in lineslist:
        if line.startswith('#'):
           continue
        i+=1
        if i % 1000000 == 0:
            out = str(i/1000000) + 'M lines processed'
            print out
        fields = line.strip().split('\t')
        chr=fields[chrField]
        if doNormalize:
            readNumber+=1
        if doChr and chrDict.has_key(chr):
            pass
        elif doChr:
            continue
        else:
            pass
        if CoverageDict.has_key(chr):
            pass
        else:
            CoverageDict[chr]={}
        start=int(fields[chrField+1])
        stop=int(fields[chrField+2])
        for j in range(start,stop):
            if CoverageDict[chr].has_key(j):
                if doBinary:
                    pass
                else:
                    CoverageDict[chr][j]+=1
            else:
                CoverageDict[chr][j] = 1

    if doNormalize:
        normFactor=readNumber/1000000.

    chrlist=CoverageDict.keys()
    chrlist.sort()
    for chr in chrlist:
        posList=CoverageDict[chr].keys()
        posList.sort()
        if len(posList) == 0:
            continue
        sameScorePos=posList[0]
        lastPos=posList[0]
        for pos in posList[1:len(posList)-1]:
            if CoverageDict[chr][pos]==CoverageDict[chr][sameScorePos] and pos==lastPos+1:
                pass
            else:
                if doNormalize:
                    outline=chr+'\t'+str(sameScorePos)+'\t'+str(lastPos+1)+'\t'+str(CoverageDict[chr][sameScorePos]/normFactor)
                else:
                    outline=chr+'\t'+str(sameScorePos)+'\t'+str(lastPos+1)+'\t'+str(CoverageDict[chr][sameScorePos])
                outfile.write(outline+'\n')
                sameScorePos=pos
            lastPos=pos
        if doNormalize:
            outline=chr+'\t'+str(sameScorePos)+'\t'+str(lastPos+1)+'\t'+str(CoverageDict[chr][sameScorePos]/normFactor)
        else:
            outline=chr+'\t'+str(sameScorePos)+'\t'+str(lastPos+1)+'\t'+str(CoverageDict[chr][sameScorePos])
            outfile.write(outline+'\n')

    outfile.close()

run()

