##################################
#                                #
# Last modified 2019/05/10       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import os
import gzip
import pyBigWig
from sets import Set

def run():

    if len(sys.argv) < 6:
        print 'usage: python %s inputfilename chrFieldID leftFieldID rightFieldID bigWig outputfilename' % sys.argv[0]
        sys.exit(1)
    
    regionfilename = sys.argv[1]
    chrFieldID = int(sys.argv[2])
    leftFieldID = int(sys.argv[3])
    rightFieldID = int(sys.argv[4])
    bigWig = sys.argv[5]
    outfilename = sys.argv[6]

    bw = pyBigWig.open(bigWig)

    outfile=open(outfilename,'w')

    if regionfilename.endswith('.gz'):
        listoflines = gzip.open(regionfilename)
    else:
        listoflines = open(regionfilename)
    L = 0
    for line in listoflines:
        L += 1
        if L % 1000000 == 0:
            print str(L/1000000) + 'M lines processed'
        if line.startswith('#'):
            outfile.write(line)
            continue
        fields=line.replace('\x00','').strip().split('\t')
        chr = fields[chrFieldID]
        left = int(fields[leftFieldID])
        right = int(fields[rightFieldID])
        if left == right:
            continue
        try:
            scores = bw.values(chr,left,right)
        except:
            print 'skipping', chr, left, right
        scores = [0 if math.isnan(x) else x for x in scores]
        outline = line.strip() + '\t' + str(sum(scores)/(right - left - 0.0))
        outfile.write(outline + '\n')

    outfile.close()
   
run()
