##################################
#                                #
# Last modified 10/07/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
from sets import Set

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s label1 isoforms_tracking1 label2 isoforms_tracking2 outfilename' % sys.argv[0]
        sys.exit(1)

    label1 = sys.argv[1]
    label2 = sys.argv[3]
    isoforms1 = sys.argv[2]
    isoforms2 = sys.argv[4]
    outfilename = sys.argv[5]

    DataDict1={}
    DataDict2={}

    print label1 

    linelist = open(isoforms1)
    for line in linelist:
        fields = line.strip().split('\t')
        if line.startswith('tracking_id'):
            trackingID = fields.index('tracking_id')
            geneID = fields.index('gene_id')
            nearestRefID = fields.index('nearest_ref_id')
            gene_short_nameID = fields.index('gene_short_name')
            statusID = fields.index('status')
            locusID = fields.index('locus')
            FPKMID = fields.index('FPKM')
            continue
        status = fields[statusID]
        tracking_id = fields[trackingID]
        gene_id = fields[geneID]
        nearest_ref_id = fields[nearestRefID]
        gene_short_name = fields[gene_short_nameID]
        locus = fields[locusID]
        FPKM = fields[FPKMID]
        DataDict1[(tracking_id,nearest_ref_id,gene_id,gene_short_name,locus)]=(FPKM,status)

    print label2

    linelist = open(isoforms2)
    for line in linelist:
        fields = line.strip().split('\t')
        if line.startswith('tracking_id'):
            trackingID = fields.index('tracking_id')
            geneID = fields.index('gene_id')
            nearestRefID = fields.index('nearest_ref_id')
            gene_short_nameID = fields.index('gene_short_name')
            statusID = fields.index('status')
            locusID = fields.index('locus')
            FPKMID = fields.index('FPKM')
            continue
        status = fields[statusID]
        tracking_id = fields[trackingID]
        gene_id = fields[geneID]
        nearest_ref_id = fields[nearestRefID]
        gene_short_name = fields[gene_short_nameID]
        locus = fields[locusID]
        FPKM = fields[FPKMID]
        DataDict2[(tracking_id,nearest_ref_id,gene_id,gene_short_name,locus)]=(FPKM,status)

    outfile=open(outfilename, 'w')

    outline = '#tracking_id\t,nearest_ref_id\tgene_id\tgene_short_name\tlocus\t' + label1 + '\t' + label2
    outfile.write(outline + '\n')

    for (tracking_id,nearest_ref_id,gene_id,gene_short_name,locus)in DataDict1.keys():
        if DataDict2.has_key((tracking_id,nearest_ref_id,gene_id,gene_short_name,locus)):
            pass
        else:
            continue
        (FPKM1,status1) = DataDict1[(tracking_id,nearest_ref_id,gene_id,gene_short_name,locus)]
        (FPKM2,status2) = DataDict2[(tracking_id,nearest_ref_id,gene_id,gene_short_name,locus)]
        if status1 == 'OK' and status2 == 'OK':
            outline = tracking_id + '\t' + nearest_ref_id + '\t' + gene_id + '\t' + gene_short_name + '\t' + locus + '\t' + FPKM1 + '\t' + FPKM2
            outfile.write(outline + '\n')
             
    outfile.close()

run()
