##################################
#                                #
# Last modified 2019/08/27       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
from sets import Set

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s inputfilename outfilename [-split binsize distanceToTSS distanceToTTS]' % sys.argv[0]
        sys.exit(1)

    inputfilename = sys.argv[1]
    outputfilename = sys.argv[2]

    doSplit = False
    if '-split' in sys.argv:
        doSplit = True
        bin = int(sys.argv[sys.argv.index('-split') + 1])
        TSSdist = int(sys.argv[sys.argv.index('-split') + 2])
        TTSdist = int(sys.argv[sys.argv.index('-split') + 3])

    outfile = open(outputfilename, 'w')

    outline='#chr\tleft\tright\tstrand\tID\tname'
    outfile.write(outline+'\n')

    GeneDict={}
    listoflines = open(inputfilename)
    for line in listoflines:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        if fields[2] == 'gene':
            pass
        else:
            continue
        if 'Name=' in fields[8]:
            geneName = fields[8].split('Name=')[1].split(';')[0]
            geneID = fields[8].split('ID=')[1].split(';')[0]
        else:
            geneID = fields[8].split('ID=')[1].split(';')[0]
            geneName = geneID
        chr = fields[0]
        left = int(fields[3])
        right = int(fields[4])
        strand = fields[6]
        if doSplit:
            if strand == '+':
                left = left + TSSdist
                right = right - TTSdist
            if strand == '-':
                left = left + TTSdist
                right = right - TSSdist
            if right < left or right - left < bin:
                continue
            B = left
            C = 0
            while B < right:
                outline = chr + '\t' + str(B) + '\t' + str(B + bin) + '\t' + strand + '\t' + geneID + '\t' + geneName + '_' + str(C)
                outfile.write(outline+'\n')
                B += bin
        else:
            outline = chr + '\t' + str(left) + '\t' + str(right) + '\t' + strand + '\t' + geneID + '\t' + geneName
            outfile.write(outline+'\n')
            
    outfile.close()

run()

