##################################
#                                #
# Last modified 01/21/2016       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set


def run():

    if len(sys.argv) < 5:
        print 'usage: python %s file1 fields1 file2 fields2 outputfilename' % sys.argv[0]
        print '\tThe first file should be the one with multiple entries per unique ID; the second file will be combined with it' 
        print '\tseparate fields with a coma: 1,2,4...' 
        sys.exit(1)
    
    file1 = sys.argv[1]
    fields = sys.argv[2].split(',')
    fields1 = []
    for f in fields:
        fields1.append(int(f))
    fields1.sort()
    file2 = sys.argv[3]
    fields = sys.argv[4].split(',')
    fields2 = []
    for f in fields:
        fields2.append(int(f))
    fields2.sort()
    outfilename = sys.argv[5]

    DataDict2 = {}

    linelist = open(file2)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        ID = []
        for f in fields2:
            ID.append(fields[f])
        ID = tuple(ID)
        DataDict2[ID] = line.strip()

    outfile = open(outfilename, 'w')

    NF = 0

    linelist = open(file1)
    for line in linelist:
        if line.startswith('#'):
            outfile.write(line)
            continue
        fields = line.strip().split('\t')
        ID = []
        for f in fields1:
            ID.append(fields[f])
        ID = tuple(ID)
        if DataDict2.has_key(ID):
            outfile.write(line.strip() + '\t' + DataDict2[ID] + '\n')
        else:
            NF+=1

    print 'not found in file2: ', NF
   
    outfile.close()
            
run()
