##################################
#                                #
# Last modified 08/24/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s conservation classification outputfilename' % sys.argv[0]
        sys.exit(1)
    
    conservation = sys.argv[1]
    classification = sys.argv[2]
    outfilename = sys.argv[3]

    outfile = open(outfilename, 'w')

    ExonDict={}
    lineslist=open(conservation)
    print conservation
    for line in lineslist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        chr=fields[0]
        left=int(fields[1])
        right=int(fields[2])
        if ExonDict.has_key((chr,left,right)):
            pass
        else:
            ExonDict[(chr,left,right)]={}
        ExonDict[(chr,left,right)]['AverageCons']=fields[3]
        ExonDict[(chr,left,right)]['FractionCons']=fields[4]

    notFound=0
            
    lineslist=open(conservation)
    print classification
    for line in lineslist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        chr=fields[0]
        left=int(fields[1])
        right=int(fields[2])
        if ExonDict.has_key((chr,left,right)):
             notFound+=1
             continue
        else:
            ExonDict[(chr,left,right)]['strand']=fields[3]
            ExonDict[(chr,left,right)]['type']=fields[4]
            ExonDict[(chr,left,right)]['match']=fields[5]
            ExonDict[(chr,left,right)]['FPKM']=fields[6]
            ExonDict[(chr,left,right)]['FPKM_lo']=fields[7]
            ExonDict[(chr,left,right)]['FPKM_hi']=fields[8]
            ExonDict[(chr,left,right)]['transcripts']=fields[9]

    outline='#chr\tleft\tright\tstrand\tType\tMatch\tFPKM\tFPKM_conf_lo\tFPKM_conf_hi\tTranscripts\tAverageConservation\tFractionConserved'
    outfile.write(outline+'\n')
    keys=ExonDict.keys()
    keys.sort()
    for (chr,left,right) in keys:
        outline=chr+'\t'+str(left)+'\t'+str(right)
        try:
            outline=outline+'\t'+ExonDict[(chr,left,right)]['strand']
            outline=outline+'\t'+ExonDict[(chr,left,right)]['type']
            outline=outline+'\t'+ExonDict[(chr,left,right)]['match']
            outline=outline+'\t'+ExonDict[(chr,left,right)]['FPKM']
            outline=outline+'\t'+ExonDict[(chr,left,right)]['FPKM_lo']
            outline=outline+'\t'+ExonDict[(chr,left,right)]['FPKM_hi']
            outline=outline+'\t'+ExonDict[(chr,left,right)]['transcripts']
            outline=outline+'\t'+ExonDict[(chr,left,right)]['AverageCons']
            outline=outline+'\t'+ExonDict[(chr,left,right)]['FractionCons']
            outfile.write(outline+'\n')
        except:
            notFound+1

    print 'Not found', notFound

    outfile.close()
   
run()
