##################################
#                                #
# Last modified 03/04/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math

def run():

    if len(sys.argv) < 6:
        print 'usage: python %s genes.bed chrfield wiggle window minGeneLength outputfilename' % sys.argv[0]
        sys.exit(1)
    
    genes = sys.argv[1]
    chrField = int(sys.argv[2])
    wiggle = sys.argv[3]
    window = int(sys.argv[4])
    minLength = int(sys.argv[5])
    outfilename = sys.argv[6]

    outfile = open(outfilename, 'w')

    GeneCount=0.0
    ScoreDict={}
    linelist=open(genes)
    for line in linelist:
        fields=line.strip().split('\t')
        chr=fields[chrField]
        left=int(fields[chrField+1])
        right=int(fields[chrField+2])
        if right-left <= minLength:
            continue
        strand=fields[chrField+3]
        if ScoreDict.has_key(chr):
            pass
        else:
            ScoreDict[chr]={}
        for i in range(left,left+window):
            ScoreDict[chr][i]=0
        for i in range(right-window,right):
            ScoreDict[chr][i]=0
        GeneCount+=1
        
    linelist=open(wiggle)
    k=0
    for line in linelist:
        k+=1
        if k % 1000000 == 0:
            print k, 'lines processed'
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        chr=fields[0]
        if chr == 'chrM':
            continue
        if ScoreDict.has_key(chr):
            pass
        else:
            continue
        left=int(fields[1])
        right=int(fields[2])
        score=float(fields[3])
        for i in range(left,right):
            if ScoreDict[chr].has_key(i):
                ScoreDict[chr][i]=score

    FinalDict={}
    for i in range(-window,window):
        FinalDict[i]=0

    linelist=open(genes)
    for line in linelist:
        fields=line.strip().split('\t')
        chr=fields[chrField]
        left=int(fields[chrField+1])
        right=int(fields[chrField+2])
        if right-left <= minLength:
            continue
        strand=fields[chrField+3]
        if strand=='+':
            for i in range(left,left+window):
                FinalDict[i-left-window]+=ScoreDict[chr][i]
            for i in range(right-window,right):
                FinalDict[i-(right-window)]+=ScoreDict[chr][i]
        if strand=='-':
            for i in range(left,left+window):
                FinalDict[100-1-(i-left)]+=ScoreDict[chr][i]
            for i in range(right-window,right):
                FinalDict[i-(right-window)-window]+=ScoreDict[chr][i]

    keys=FinalDict.keys()
    keys.sort()
    for i in keys:
        outline=str(i) + '\t' + str(FinalDict[i]/GeneCount)
        outfile.write(outline+'\n')

    outfile.close()
   
run()
