##################################
#                                #
# Last modified 2018/02/03       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s gtf outputfilename ' % sys.argv[0]
        sys.exit(1)
    
    inputfilename = sys.argv[1]
    outfilename = sys.argv[2]

    outfile = open(outfilename, 'w')
    outline = '#chr\tleft\tright\tstrand\tgeneID\tgeneName\ttranscriptID\ttranscriptName\tattributess'
    outfile.write(outline+'\n')

    linelist = open(inputfilename)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        if fields[2]!='exon':
            continue
        chr = fields[0]
        start = fields[3]
        stop = fields[4]
        strand = fields[6]
        if 'transcript_name "' in fields[8]:
            transcriptName = fields[8].split('transcript_name "')[1].split('";')[0]
        else:
            transcriptName = fields[8].split('transcript_id "')[1].split('";')[0]
        transcriptID = fields[8].split('transcript_id "')[1].split('";')[0]
        if 'gene_name "' in fields[8]:
            geneName = fields[8].split('gene_name "')[1].split('";')[0]
        else:
            geneName = fields[8].split('gene_id "')[1].split('";')[0]
        geneID = fields[8].split('gene_id "')[1].split('";')[0]
        outline = chr + '\t' + start  + '\t' + stop + '\t' + strand + '\t' + geneID + '\t' + geneName + '\t' + transcriptID + '\t' + transcriptName + '\t' + fields[8]
        outfile.write(outline+'\n')
   
    outfile.close()
   
run()
