##################################
#                                #
# Last modified 7/9/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s label inputfilename outfilename newreadlength shift [-color r,g,b]' % sys.argv[0]
        sys.exit(1)

    label = sys.argv[1]
    inputfilename = sys.argv[2]
    outputfilename = sys.argv[3]
    newreadlength = int(sys.argv[4])
    shiftlength = int(sys.argv[5])


    color = '0,0,0'
    if '-color' in sys.argv:
        color = sys.argv[sys.argv.index('-color') + 1]

    outfile = open(outputfilename, 'w')
    outfile.write('track name=%s visibility=4 color=%s\n' % (label, color))

    listoflines = open(inputfilename)
    lineslist = listoflines.readlines()
    lineslist.remove(lineslist[0])
    i=0
    lineDict={}
    for line in lineslist:
        if i % 1000000 == 0:
            print i, 'reads processed' 
        i+=1
        fields = line.strip().split('\t')
        if lineDict.has_key(fields[0]):
            lineDict[fields[0]][int(fields[1])]=line
        else:
            lineDict[fields[0]]={}
            lineDict[fields[0]][int(fields[1])]=line
    print 'finished parsing reads'
    chromosomeList=lineDict.keys()
    chromosomeList.sort()
    for chromosome in chromosomeList:
        print chromosome
        positions=lineDict[chromosome].keys()
        positions.sort()
        for startPos in positions:
            line=lineDict[chromosome][startPos]
            fields = line.strip().split('\t')
            if fields[5]=='+':
                newend=int(fields[1])+newreadlength
                newstart=newend-newreadlength+shiftlength
                newend=newend-shiftlength
                newend=str(newend)
                newstart=str(newstart)
                outline =  '%s\t%s\t%s\t%s\t%s\t%s\t' % (fields[0], newstart, newend, fields[3], fields[4], fields[5])
                outline=outline+'0\t0\t0,0,255\t1\t'+str(newreadlength-1)+'\t0\n'
            if fields[5]=='-':
                newstart=int(fields[2])-newreadlength
                newend=newstart+newreadlength
                newstart=newstart+shiftlength
                newend=newend-shiftlength
                newend=str(newend)
                newstart=str(newstart)
                outline =  '%s\t%s\t%s\t%s\t%s\t%s\t' % (fields[0], newstart, newend, fields[3], fields[4], fields[5])
                outline=outline+'0\t0\t255,0,0\t1\t'+str(newreadlength-1)+'\t0\n'
            outfile.write(outline)
    outfile.close()

run()

