##################################
#                                #
# Last modified 02/16/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s genes.expr gtf outfilename' % sys.argv[0]
        sys.exit(1)
    
    expr=sys.argv[1]
    gtf=sys.argv[2]
    outfile = open(sys.argv[3], 'w')

    GeneDict={}
    linelist = open(gtf)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        if fields[2]!='gene':
            continue
        GeneID = fields[8].split('gene_id "')[1].split('";')[0]
        GeneDict[GeneID]=(fields[0],fields[1],fields[2],fields[3],fields[4],fields[5],fields[6],fields[7],fields[8])

    outlines=[]

    linelist = open(expr)
    for line in linelist:
        if line.startswith('gene_id'):
            continue
        fields=line.strip().split('\t')
        GeneID = fields[0]
        FPKM = fields[5]
        FPKM_lo = fields[6]
        FPKM_hi = fields[7]
        status = fields[8]
        (chr,type,gene,left,right,score,strand,dot,attributes)=GeneDict[GeneID]
        score=100*(math.log((float(FPKM)+1),2))
        IDR='.'
        chr=chr
        type=type
        gene=gene
        left=int(left)
        right=int(right)
        score=str(score)[0:7]
        strand=strand
        dot=dot
        attributes = attributes + ' FPKM "' + FPKM + '";' + ' FPKM_lo "' + FPKM_lo + '";' ' FPKM_hi "' + FPKM_hi + '";' ' npIDR "' + str(IDR)+'";'
        outlines.append((chr,left,right,type,gene,score,strand,dot,attributes))

    outlines.sort()
    for (chr,left,right,type,gene,score,strand,dot,attributes) in outlines:
        outline=chr +'\t'+ type +'\t'+ gene +'\t'+ str(left) +'\t'+ str(right) +'\t'+ score +'\t'+ strand +'\t'+ dot +'\t'+ attributes
        outfile.write(outline+'\n')
    outfile.close()


run()