##################################
#                                #
# Last modified 02/15/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 8:
        print 'usage: python %s GENCODE-data iIDR gtf1 gtf-outfilename1 gtf2 gtf-outfilename2 outfilename1 outfilename2' % sys.argv[0]
        sys.exit(1)
    
    input=sys.argv[1]
    iIDR=sys.argv[2]
    GTF1 = sys.argv[3]
    GTF2 = sys.argv[4]
    gtfoutfile1 = open(sys.argv[5], 'w')
    gtfoutfile2 = open(sys.argv[6], 'w')
    outfile1 = open(sys.argv[7], 'w')
    outfile2 = open(sys.argv[8], 'w')

    numbertoIDRDict={}
    i=0
    linelist = open(iIDR)
    for line in linelist:
        fields=line.strip().split('\t')
        IDR=fields[0]
        numbertoIDRDict[i]=IDR
        i+=1

    outline='#ID\tchr\tleft\tright\tFPKM\tFMI\tfrac\tFPKM_lo\tFPKM_hi\tcoverage\tlenght\teffective_length\tstatus\tnpIDR'
    outfile1.write(outline+'\n')
    outfile2.write(outline+'\n')

    IDtoIDRDict={}

    i=0
    linelist = open(input)
    for line in linelist:
        if line.startswith('ID1'):
            continue
        fields=line.strip().split('\t')
        ID=fields[0]
        IDR=numbertoIDRDict[i]
        IDtoIDRDict[ID]=IDR
        chr=fields[1]
        left=fields[2]
        right=fields[3]
        FPKM1=fields[4]
        FMI1=fields[5]
        frac1=fields[6]
        FPKM1_lo=fields[7]
        FPKM1_hi=fields[8]
        coverage1=fields[9]
        length1=fields[10]
        effective_length1=fields[11]
        status1=fields[12]
        FPKM2=fields[13]
        FMI2=fields[14]
        frac2=fields[15]
        FPKM2_lo=fields[16]
        FPKM2_hi=fields[17]
        coverage2=fields[18]
        length2=fields[19]
        effective_length2=fields[20]
        status2=fields[21]
        outline1=ID+'\t'+chr+'\t'+left+'\t'+right+'\t'+FPKM1+'\t'+FMI1+'\t'+frac1+'\t'+FPKM1_lo+'\t'+FPKM1_hi+'\t'+coverage1+'\t'+length1+'\t'+effective_length1+'\t'+status1+'\t'+IDR
        outline2=ID+'\t'+chr+'\t'+left+'\t'+right+'\t'+FPKM2+'\t'+FMI2+'\t'+frac2+'\t'+FPKM2_lo+'\t'+FPKM2_hi+'\t'+coverage2+'\t'+length2+'\t'+effective_length2+'\t'+status2+'\t'+IDR
        outfile1.write(outline1+'\n')
        outfile2.write(outline2+'\n')
        i+=1

    outfile1.close()
    outfile2.close()

    linelist = open(GTF1)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        ID = fields[8].split('transcript_id "')[1].split('";')[0]
        IDR=IDtoIDRDict[ID]
        outline=line.strip()+' npIDR "' + str(IDR)+'";'
        gtfoutfile1.write(outline+'\n')

    gtfoutfile1.close()

    linelist = open(GTF2)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        ID = fields[8].split('transcript_id "')[1].split('";')[0]
        IDR=IDtoIDRDict[ID]
        outline=line.strip()+' npIDR "' + str(IDR)+'";'
        gtfoutfile2.write(outline+'\n')

    gtfoutfile2.close()


run()