##################################
#                                #
# Last modified 2019/08/26       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s input valuefields outputfilename' % sys.argv[0]
        print '       valuefields format: either comma separated, or start:end (including start and end, 0-based)'
        sys.exit(1)
    
    input = sys.argv[1]
    outfilename = sys.argv[3]
    outfile = open(outfilename, 'w')

    valueFields=[]
    if ':' in sys.argv[2]:
        fields = sys.argv[2].split(':')
        start = int(fields[0])
        end = int(fields[1])
        for f in range(start,end+1):
            valueFields.append(f)
    else:
        fields = sys.argv[2].split(',')
        for f in fields:
            valueFields.append(int(f))
    valueFields.sort()

    print valueFields

    DataDict = {}

    LC = 0

    linelist = open(input)
    for line in linelist:
        LC += 1
        if LC % 1000000 == 0:
            print LC/1000000, 'M lines processed'
        fields = line.replace('\x00','').strip().split('\t')
        if line.startswith('#'):
            if len(fields) < max(valueFields):
                continue
            for ID in valueFields:
                DataDict[ID] = {}
                DataDict[ID]['label'] = fields[ID]
                DataDict[ID]['score'] = 0
            continue
        for ID in valueFields:
            DataDict[ID]['score'] += float(fields[ID])

    for ID in valueFields:
        outline = DataDict[ID]['label'] + '\t' + str(DataDict[ID]['score'])
        outfile.write(outline.strip()+'\n') 
            
    outfile.close()
   
run()
