##################################
#                                #
# Last modified 02/15/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 6:
        print 'usage: python %s GTF-combined-IDs+scores iDR GTF1 GTF2 outfilename1 outfilename2' % sys.argv[0]
        sys.exit(1)
    
    input=sys.argv[1]
    iIDR=sys.argv[2]
    GTF1=sys.argv[3]
    GTF2=sys.argv[4]
    outfile1 = open(sys.argv[5], 'w')
    outfile2 = open(sys.argv[6], 'w')

    numbertoIDRDict={}
    i=0
    linelist = open(iIDR)
    for line in linelist:
        fields=line.strip().split('\t')
        IDR=fields[0]
        numbertoIDRDict[i]=IDR
        i+=1

    TranscriptDict1={}
    TranscriptDict2={}

    i=0
    linelist = open(input)
    for line in linelist:
        if line.startswith('ID1'):
            continue
        fields=line.strip().split('\t')
        ID1=fields[0]
        ID2=fields[4]
        IDR=numbertoIDRDict[i]
        if ID1 != '-1':
            TranscriptDict1[ID1]=IDR
        if ID2 != '-1':
            TranscriptDict2[ID2]=IDR
        i+=1

    linelist = open(GTF1)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        ID = fields[8].split('transcript_id "')[1].split('";')[0]
        FPKM = float(fields[8].split('FPKM "')[1].split('";')[0])
        score=100*(math.log((FPKM+1),2))
        IDR=TranscriptDict1[ID]
        outline=fields[0]+'\t'+fields[1]+'\t'+fields[2]+'\t'+fields[3]+'\t'+fields[4]+'\t'+str(score)[0:7]+'\t'+fields[6]+'\t'+fields[7]+'\t'+fields[8]+' npIDR "' + str(IDR)+'";'
        outfile1.write(outline+'\n')

    linelist = open(GTF2)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        ID = fields[8].split('transcript_id "')[1].split('";')[0]
        FPKM = float(fields[8].split('FPKM "')[1].split('";')[0])
        score=100*(math.log((FPKM+1),2))
        IDR=TranscriptDict2[ID]
        outline=fields[0]+'\t'+fields[1]+'\t'+fields[2]+'\t'+fields[3]+'\t'+fields[4]+'\t'+str(score)[0:7]+'\t'+fields[6]+'\t'+fields[7]+'\t'+fields[8]+' npIDR "' + str(IDR)+'";'
        outfile2.write(outline+'\n')

    outfile1.close()
    outfile2.close()

run()