##################################
#                                #
# Last modified 02/11/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set


def run():

    if len(sys.argv) < 2:
        print 'usage: python %s file1 labelfields1 valuefields1 file2 labelfields2 valuefields2 outfile [-union]' % sys.argv[0]
        print '       fields should be split by a comma or by - if a range is desired'
        print '       if the -union option is used, zeros will be placed where values are missing'
        sys.exit(1)

    doUnion=False
    if '-union' in sys.argv:
        doUnion=True

    file1 = sys.argv[1]
    labelFields1=[]
    fields=sys.argv[2].split(',')
    for ID in fields:
        if '-' in ID:
            IDs=ID.split('-')
            for i in IDs:
                labelFields1.append(int(i))
        else:
            labelFields1.append(int(ID))
    valueFields1=[]
    fields=sys.argv[3].split(',')
    for ID in fields:
        if '-' in ID:
            IDs=ID.split('-')
            for i in IDs:
                valueFields1.append(int(i))
        else:
            valueFields1.append(int(ID))
    file2 = sys.argv[4]
    labelFields2=[]
    fields=sys.argv[5].split(',')
    for ID in fields:
        if '-' in ID:
            IDs=ID.split('-')
            for i in IDs:
                labelFields2.append(int(i))
        else:
            labelFields2.append(int(ID))
    valueFields2=[]
    fields=sys.argv[6].split(',')
    for ID in fields:
        if '-' in ID:
            IDs=ID.split('-')
            for i in IDs:
                valueFields2.append(int(i))
        else:
            valueFields2.append(int(ID))
    outfilename=sys.argv[7]

    print labelFields1, valueFields1, labelFields2, valueFields2

    DataDict1={}
    linelist=open(file1)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        label=[]
        for ID in labelFields1:
            label.append(fields[ID])
        label=tuple(label)
        values=[]
        for ID in valueFields1:
            values.append(fields[ID])
        DataDict1[label]=values
       
    DataDict2={}
    linelist=open(file2)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        label=[]
        for ID in labelFields2:
            label.append(fields[ID])
        label=tuple(label)
        values=[]
        for ID in valueFields2:
            values.append(fields[ID])
        DataDict2[label]=values

    if doUnion:
        for label1 in DataDict1.keys():
            if DataDict2.has_key(label1):
                continue
            else:
                DataDict2[label1]=[]
                for ID in valueFields2:
                    DataDict2[label1].append('0')
        for label2 in DataDict2.keys():
            if DataDict1.has_key(label2):
                continue
            else:
                DataDict1[label2]=[]
                for ID in valueFields1:
                    DataDict1[label2].append('0')
    else:
        for label1 in DataDict1.keys():
            if DataDict2.has_key(label1):
                continue
            else:
                del DataDict1[label1]
        for label2 in DataDict2.keys():
            if DataDict1.has_key(label2):
                continue
            else:
                del DataDict2[label2]

    outfile=open(outfilename,'w')

#    keys=DataDict1.keys()
#    keys.sort()
#    print keys
    for label in DataDict1.keys():
        labels=list(label)
        outline=''
        for i in labels:
            outline=outline+i+'\t'
        for i in DataDict1[label]:
            outline=outline+i+'\t'
        for i in DataDict2[label]:
            outline=outline+i+'\t'
        outfile.write(outline+'\n')
  
    outfile.close()
            
run()
