##################################
#                                #
# Last modified 2023/02/02       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import os
from sets import Set
from operator import add

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s datafilename labelIDs valueIDs N outfilename [-average]' % sys.argv[0]
        print 'format of IDs: either comma separated or start-end (including end)'
        sys.exit(1)

    doAverage = False
    if '-average' in sys.argv:
        doAverage = True
        print 'will output the average'

    datafilename = sys.argv[1]
    LabelIDfields=[]
    if '-' in sys.argv[2]:
        fields1=int(sys.argv[2].split('-')[0])
        fields2=int(sys.argv[2].split('-')[1])
        for ID in range(fields1,fields2+1):
            LabelIDfields.append(ID)
    else:
        fields = sys.argv[2].split(',')
        for ID in fields:
            LabelIDfields.append(int(ID))
    ValueIDfields=[]
    if '-' in sys.argv[3]:
        fields1=int(sys.argv[3].split('-')[0])
        fields2=int(sys.argv[3].split('-')[1])
        for ID in range(fields1,fields2+1):
            ValueIDfields.append(ID)
    else:
        fields = sys.argv[3].split(',')
        for ID in fields:
            ValueIDfields.append(int(ID))
    N = int(sys.argv[4])
    outfilename = sys.argv[5]

    outfile = open(outfilename, 'w')

    if datafilename.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + datafilename
    elif datafilename.endswith('.gz'):
        cmd = 'gunzip -c ' + datafilename
    elif datafilename.endswith('.zip'):
        cmd = 'unzip -p ' + datafilename
    else:
        cmd = 'cat ' + datafilename
    p = os.popen(cmd, "r")
    line = 'line'
    RL = 0
    scores = []
    for ID in ValueIDfields:
        scores += [0]
    while line != '':
        line = p.readline().strip()
        if line == '':
            break
        if line[0]=='#':
            outfile.write(line + '\n')
            continue
        RL += 1
        fields = line.strip().split('\t')
        linescore = []
        for ID in ValueIDfields:
            linescore += [float(fields[ID])]
        scores = list(map(add, scores, linescore))
        if RL % N == 0:
            print RL
            outline=''
            for ID in LabelIDfields:
                outline = outline + fields[ID] + '\t'
            for j in range(len(scores)):
                if doAverage:
                    outline = outline + str(scores[j]/N) + '\t'
                else:
                    outline = outline + str(scores[j]) + '\t'
            outfile.write(outline.strip() + '\n')
            scores = []
            for ID in ValueIDfields:
                scores += [0]

    outfile.close()
        
run()

