##################################
#                                #
# Last modified 09/13/2015       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set
import math

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s gff total_size outfile ' % sys.argv[0]
        sys.exit(1)

    GFF = sys.argv[1]
    TS = int(sys.argv[2])
    GeneDict = {}
    GeneDict['+'] = []
    GeneDict['-'] = []
    linelist=open(GFF)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        gene = fields[2]
        left = int(fields[3])
        right = int(fields[4])
        strand = fields[6]
        GeneDict[strand].append((left,right,gene))
 
    outfile = open(sys.argv[3],'w')

    GeneDict['+'].sort()
    GeneDict['-'].sort()

    currentPos = 0
    for (left,right,gene) in GeneDict['+']:
        outline = '+' + '\t' + str(left - currentPos) + '\t' + ''
        outfile.write(outline + '\n')
        outline = '+' + '\t' + str(right - left) + '\t' + gene
        outfile.write(outline + '\n')
        currentPos = right
    outline = '+' + '\t' + str(TS - currentPos) + '\t' + ''
    outfile.write(outline + '\n')

    currentPos = 0
    for (left,right,gene) in GeneDict['-']:
        outline = '-' + '\t' + str(left - currentPos) + '\t' + ''
        outfile.write(outline + '\n')
        outline = '-' + '\t' + str(right - left) + '\t' + gene
        outfile.write(outline + '\n')
        currentPos = right
    outline = '-' + '\t' + str(TS - currentPos) + '\t' + ''
    outfile.write(outline + '\n')
            

    outfile.close()
   
run()
