##################################
#                                #
# Last modified 2019/12/06       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s  list1filename fieldID1 list2filename fieldID2 outfilename [-union] [-outersect] [-fields ID1,ID2,...IDN] [-doLine] [-split string] [-ignoreCap]' % sys.argv[0]
        sys.exit(1)

    doUnion=False
    doIgnoreCap=False
    list1filename = sys.argv[1]
    fieldID1 = int(sys.argv[2])
    list2filename = sys.argv[3]
    fieldID2 = int(sys.argv[4])
    outfilename = sys.argv[5]

    doFields=False
    if '-fields' in sys.argv:
        doFields=True
        IDfields=[]
        fields=sys.argv[sys.argv.index('-fields')+1].split(',')
        for ID in fields:
            IDfields.append(int(ID))
        IDfields.sort()
        print 'will consider fields', IDfields, 'together'

    if '-union' in sys.argv:
        doUnion=True
        print 'doing Union'

    if '-ignoreCap' in sys.argv:
        doIgnoreCap=True
        print 'ignoring capital letter'

    doOutersect=False
    if '-outersect' in sys.argv:
        doOutersect=True

    doLine=False
    if '-doLine' in sys.argv:
        doLine=True

    splitString = '\t'
    doSplit=False
    if '-split' in sys.argv:
        doSplit=True
        splitString = sys.argv[sys.argv.index('-split')+1]
        print splitString

    lineslist = open(list1filename)
    list1 = []
    for line in lineslist:
        fields = line.strip().split(splitString)
        if doLine:
             list1.append(line.strip())
        elif doIgnoreCap:
            list1.append(str(fields[0]).lower())
        elif doFields:
            element = ''
            for ID in IDfields:
                element = element + fields[ID] + '\t'
            list1.append(element.strip())
        else:
            list1.append(str(fields[fieldID1]))

    lineslist = open(list2filename)
    list2 = []
    for line in lineslist:
        fields = line.strip().split(splitString)
        if doLine:
            list2.append(line.strip())
        elif doIgnoreCap:
            list2.append(str(fields[0]).lower())
        elif doFields:
            element=''
            for ID in IDfields:
                element=element + fields[ID] + '\t'
            list2.append(element.strip())
        else:
            list2.append(str(fields[fieldID2]))

    list1 = Set(list1)
    list2 = Set(list2)
    
#    commonlist = Set.intersection(list1,list2)

    if doUnion:
        commonlist = list1 | list2
    if doOutersect:
        commonlist = list1 - list2
    if not doUnion and not doOutersect:
        commonlist = list1 & list2
    commonlist=list(commonlist)
    commonlist.sort()
    print len(list1), len(list2), len(commonlist)
    outfile = open(outfilename, 'w')
    for f in commonlist:
        outfile.write(f+'\n')            

run()
