##################################
#                                #
# Last modified 2018/12/03       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import pyBigWig
import gzip
import os
from sets import Set

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s bedfilename chrField readlength bigWig outputfilename' % sys.argv[0]
        print 'Note: the script assumes RPM-normalized input' 
        sys.exit(1)
    
    bed = sys.argv[1]
    fieldID = int(sys.argv[2])
    RL = int(sys.argv[3])
    bigWig = sys.argv[4]
    outfilename = sys.argv[5]

    outfile = open(outfilename, 'w')

    bw = pyBigWig.open(bigWig)

    lineslist = open(bed)
    l=0
    for line in lineslist:
        l+=1
        if l % 50000 == 0:
            print l, 'lines processed'
        if line[0]=='#':
            outline = line.strip() + '\tRPM'
            outfile.write(outline + '\n')
            continue
        fields = line.strip().split('\t')
        chr = fields[fieldID]
        left = int(fields[fieldID+1])
        right = int(fields[fieldID+2])
        score = 0.0
#        print chr,left,right
#        print bw.values(chr,left,right)
        for s in bw.values(chr,left,right):
            if math.isnan(s):
                pass
            else:
                score += float(s)
        score = score/RL
        outline = line.strip() + '\t' + str(score)
        outfile.write(outline + '\n')

    outfile.close()
   
run()
