##################################
#                                #
# Last modified 2024/08/28       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s input gene_chr_field_ID ATAC/DNase/ChIP_fieldID Hi-C_fieldID outfile' % sys.argv[0]
        print '\tlist_of_files format: label <tab> file_name'
        sys.exit(1)

    input = sys.argv[1]
    geneFieldID = int(sys.argv[2])
    ActFieldID = int(sys.argv[3])
    ContactFieldID = int(sys.argv[4])
    outfilename = sys.argv[5]

    GeneDict = {}
    linelist = open(input)
    LC = 0
    for line in linelist:
        fields = line.strip().split('\t')
        if line.startswith('#'):
            continue
        LC+=1
        if LC % 100000 == 0:
            print LC, 'lines processed' 
        geneID = fields[geneFieldID]
        ACT = float(fields[ActFieldID])
        CONT = float(fields[ContactFieldID])
        if GeneDict.has_key(geneID):
            pass
        else:
            GeneDict[geneID] = []
        GeneDict[geneID].append((ACT,CONT))

    outfile = open(outfilename,'w')

    for geneID in GeneDict.keys():
        ABC = 0
        for (A,C) in GeneDict[geneID]:
            ABC += A*C
        outline = geneID + '\t' + str(ABC)
        outfile.write(outline.strip() + '\n')

    outfile.close()
        
run()