##################################
#                                #
# Last modified 02/18/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s rep1-vs-rep2-data IDR gtf outfilename1 outfilename2' % sys.argv[0]
        sys.exit(1)
    
    data=sys.argv[1]
    IDR=sys.argv[2]
    gtf=sys.argv[3]
    outfile1 = open(sys.argv[4], 'w')
    outfile2 = open(sys.argv[5], 'w')

    GeneDict={}
    linelist = open(gtf)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        if fields[2]!='gene':
            continue
        GeneID = fields[8].split('gene_id "')[1].split('";')[0]
        GeneDict[GeneID]=(fields[0],fields[1],fields[2],fields[3],fields[4],fields[5],fields[6],fields[7],fields[8])

    linelist = open(IDR)
    i=0
    PostoIDRDict={}
    for line in linelist:
        if line.startswith('gene_id'):
            continue
        fields=line.strip().split('\t')
        IDR=fields[0]
        PostoIDRDict[i]=IDR
        i+=1

    linelist = open(data)
    i=0
    outlines1=[]
    outlines2=[]
    for line in linelist:
        if line.startswith('gene_id'):
            continue
        fields=line.strip().split('\t')
        GeneID = fields[0]
        FPKM1 = fields[4]
        FPKM1_lo = fields[5]
        FPKM1_hi = fields[6]
        status1 = fields[7]
        FPKM2 = fields[8]
        FPKM2_lo = fields[9]
        FPKM2_hi = fields[10]
        status2 = fields[11]
        (chr,type,gene,left,right,score,strand,dot,attributes)=GeneDict[GeneID]
        score1=100*(math.log((float(FPKM1)+1),2))
        score2=100*(math.log((float(FPKM2)+1),2))
        IDR=PostoIDRDict[i]
        attributes1= attributes + ' FPKM "' + FPKM1 + '";' + ' FPKM_lo "' + FPKM1_lo + '";' ' FPKM_hi "' + FPKM1_hi + '";' ' npIDR "' + str(IDR)+'";'
        attributes2= attributes + ' FPKM "' + FPKM2 + '";' + ' FPKM_lo "' + FPKM2_lo + '";' ' FPKM_hi "' + FPKM2_hi + '";' ' npIDR "' + str(IDR)+'";'
        outline1=(chr,type,'gene',int(left),int(right),str(score1)[0:7],strand,dot,attributes1)
        outline2=(chr,type,'gene',int(left),int(right),str(score2)[0:7],strand,dot,attributes2)
        outlines1.append(outline1)
        outlines2.append(outline2)
        i+=1

    outlines1.sort()
    outlines2.sort()

    for (chr,type,gene,left,right,score,strand,dot,attributes) in outlines1:
        outline1=chr +'\t'+ type +'\t'+ gene +'\t'+ str(left) +'\t'+ str(right)  +'\t'+ score +'\t'+ strand +'\t'+ dot +'\t' + attributes
        outfile1.write(outline1+'\n')

    for (chr,type,gene,left,right,score,strand,dot,attributes) in outlines2:
        outline2=chr +'\t'+ type +'\t'+ gene +'\t'+ str(left) +'\t'+ str(right)  +'\t'+ score +'\t'+ strand +'\t'+ dot +'\t' + attributes
        outfile2.write(outline2+'\n')

    outfile1.close()
    outfile2.close()

run()