##################################
#                                #
# Last modified 03/09/2012       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import random
from sets import Set

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s inputfilename chrFieldID wigFile outfilename [-singlecoordinate radius] [-maximumValue]' % sys.argv[0]
        sys.exit(1)

    input = sys.argv[1]
    wiggle = sys.argv[3]
    chfFieldID = int(sys.argv[2])
    outfilename = sys.argv[4]

    doSC=False
    if '-singlecoordinate' in sys.argv:
        doSC=True
        radius=int(sys.argv[sys.argv.index('-singlecoordinate')+1])

    doMax=False
    if '-maximumValue' in sys.argv:
        doMax=True
        print 'Will output the maximum value instead'

    WantedDict={}

    linelist = open(input)
    i=0
    for line in linelist:
        if line.startswith('#'):
            continue
        i+=1
        if i % 1000000 == 0:
            print i, 'lines processed'
        fields = line.strip().split('\t')
        chr=fields[chfFieldID]
        if doSC:
            left = int(fields[chfFieldID+1])-radius
            right = int(fields[chfFieldID+1])+radius
        else:
            left = int(fields[chfFieldID+1])
            right = int(fields[chfFieldID+2])
        if WantedDict.has_key(chr):
            pass
        else:
            WantedDict[chr]={}
        for j in range(left,right):
            WantedDict[chr][j]=0

    linelist = open(wiggle)
    i=0
    for line in linelist:
        if line.startswith('#'):
            continue
        i+=1
        if i % 5000000 == 0:
            print str(i/1000000) + 'M lines processed'
        fields = line.strip().split('\t')
        if len(fields) == 1:
            fields = line.strip().split(' ')
        chr = fields[0]
        left = int(fields[1])
        right = int(fields[2])
        score = float(fields[3])
        if WantedDict.has_key(chr):
            pass
        else:
            continue
        for j in range(left,right):
            if WantedDict[chr].has_key(j):
                WantedDict[chr][j] = score

    outfile = open(outfilename, 'w')

    linelist = open(input)
    for line in linelist:
        if line.startswith('#'):
            outline = line.strip() + '\t' + 'Score' + '\n'
            outfile.write(outline)
            continue
        fields = line.strip().split('\t')
        chr = fields[chfFieldID]
        if doSC:
            left = int(fields[chfFieldID+1])-radius
            right = int(fields[chfFieldID+1])+radius
        else:
            left = int(fields[chfFieldID+1])
            right = int(fields[chfFieldID+2])
        if doMax:
            Score = 0.0
            for j in range(left,right):
                if WantedDict[chr][j] > Score:
                    Score = WantedDict[chr][j]
            if right == left:
                print 'region of size 0 encountered, skipping', chr, right, left
                continue
        else:
            TotalScore = 0.0
            for j in range(left,right):
                TotalScore += WantedDict[chr][j]
            if right == left:
                print 'region of size 0 encountered, skipping', chr, right, left
                continue
            Score = TotalScore / (right-left)
        outline = line.strip() + '\t' + str(Score) + '\n'
        outfile.write(outline)

    outfile.close()

run()
