##################################
#                                #
# Last modified 02/16/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s transcripts.gtf gtf outfilename' % sys.argv[0]
        sys.exit(1)
    
    transcripts=sys.argv[1]
    gtf=sys.argv[2]
    outfile = open(sys.argv[3], 'w')

    TypeDict={}
    linelist = open(gtf)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        TranscriptID = fields[8].split('transcript_id "')[1].split('";')[0]
        TypeDict[TranscriptID]=fields[1]

    linelist = open(transcripts)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        TranscriptID = fields[8].split('transcript_id "')[1].split('";')[0]
        FPKM = float(fields[8].split('FPKM "')[1].split('";')[0])
        score=100*(math.log((FPKM+1),2))
        IDR='.'
        type=TypeDict[TranscriptID]
        outline=fields[0]+'\t'+type+'\t'+fields[2]+'\t'+fields[3]+'\t'+fields[4]+'\t'+str(score)[0:7]+'\t'+fields[6]+'\t'+fields[7]+'\t'+fields[8]+' npIDR "' + str(IDR)+'";'
        outfile.write(outline+'\n')

    outfile.close()


run()