##################################
#                                #
# Last modified 05/16/2015       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set
import os
import subprocess

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s RAST.gtf output [-skip skipped-list]'
        print '\tskipped-list format: one feature name per line'
        sys.exit(1)

    input = sys.argv[1]
    outfilename = sys.argv[2]

    SkippedDict = {}
    doSkip = False
    if '-skip' in sys.argv:
        doSkip = True
        linelist = open(sys.argv[sys.argv.index('-skip') + 1])
        for line in linelist:
            name = line.strip()
            SkippedDict[name] = 1

    SeenDict = {}

    outfile = open(outfilename,'w')

    currentChr = ''
    doParse = False
    lineslist  = open(input)
    for line in lineslist:
        if line.strip() == '':
            continue
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        feature = fields[2]
        if doSkip:
            if SkippedDict.has_key(feature):
                continue
        geneID = fields[8].split('ID=')[1].split(';')[0]
        geneName = fields[8].split('Name=')[1].split(';')[0]
        outline = fields[0] + '\t' + fields[1] + '\t' + 'exon' + '\t' + fields[3] + '\t' + fields[4] + '\t' + fields[5] + '\t' + fields[6] + '\t' + fields[7] + '\t'
        outline = outline + 'gene_id "' + geneID + '"; gene_name "' + geneName + '"; ' + 'transcript_id "' + geneID + '"; transcript_name "' + geneName + '";'
        if SeenDict.has_key(outline):
            continue
        else:
            SeenDict[outline] = 1
        outfile.write(outline + '\n')
        outline = fields[0] + '\t' + fields[1] + '\t' + fields[2] + '\t' + fields[3] + '\t' + fields[4] + '\t' + fields[5] + '\t' + fields[6] + '\t' + fields[7] + '\t'
        outline = outline + 'gene_id "' + geneID + '"; gene_name "' + geneName + '"; ' + 'transcript_id "' + geneID + '"; transcript_name "' + geneName + '";'
        outfile.write(outline + '\n')


    outfile.close()

run()