##################################
#                                #
# Last modified 2021/02/19       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s merged_sort.txt outfile' % sys.argv[0]
        sys.exit(1)

    MS = sys.argv[1]
    outfilename = sys.argv[2]

    ReadDict = {}
    Dups = 0

    outfile = open(outfilename, 'w')

    lineslist = open(MS)
    i=0
    for line in lineslist:
        i+=1
        if i % 1000000 == 0:
            print str(i/1000000) + 'M lines processed', 
            print str(Dups) + ' dups found'
        fields = line.strip().split(' ')
        read = (fields[0],fields[1],fields[2],fields[3],fields[4],fields[5],fields[6],fields[7])
        if ReadDict.has_key(read):
            Dups += 1
            continue
        else:
            outfile.write(line)
            ReadDict[read] = 1

    outfile.close()

run()
