##################################
#                                #
# Last modified 05/16/2015       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set
import os
import subprocess

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s rnammer output'
        sys.exit(1)

    input = sys.argv[1]
    outfilename = sys.argv[2]

    outfile = open(outfilename,'w')

    SeenDict = {}
    doParse = False
    lineslist  = open(input)
    for line in lineslist:
        if line.startswith('#'):
            continue
        if Set(line.strip()) == Set([' ', '-']):
            continue
        fields = line.strip().split('\t')
        chr = fields[0]
        start = int(fields[3])
        end = int(fields[4])
        strand = fields[6]
        type = fields[8]
        if SeenDict.has_key(type):
            pass
        else:
            SeenDict[type] = 0
        SeenDict[type] += 1
        geneID = type
        geneName = type
        if SeenDict[type] > 1:
            geneID = type + '-' + str(SeenDict[type])
        
        outline = chr + '\t' + fields[1].split(' ')[0] + '\t' + 'exon' + '\t' + str(min(end,start)) + '\t' + str(max(end,start)) + '\t1000\t' + strand + '\t.\tgene_id "' + geneID + '"; gene_name "' + geneName + '"; ' + 'transcript_id "' + geneID + '"; transcript_name "' + geneName + '";'
        outfile.write(outline + '\n')
        outline = chr + '\t' + fields[1].split(' ')[0] + '\t' + fields[2] + '\t' + str(min(end,start)) + '\t' + str(max(end,start)) + '\t1000\t' + strand + '\t.\tgene_id "' + geneID + '"; gene_name "' + geneName + '"; ' + 'transcript_id "' + geneID + '"; transcript_name "' + geneName + '";'
        outfile.write(outline + '\n')

    outfile.close()

run()