##################################
#                                #
# Last modified 03/25/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s < region (exon coordinates) filename> chrFieldID wigfilename outputfilename ' % sys.argv[0]
        print 'The output of this script is the maximum difference between per bp coverage over individual exons' 

        sys.exit(1)
    
    regionfilename = sys.argv[1]
    fieldID = int(sys.argv[2])
    wigfilename = sys.argv[3]
    outfilename = sys.argv[4]

    outfile = open(outfilename, 'w')

    listoflines = open(regionfilename)
    RegionList=[]
    CoverageDict={}
    p=0
    for line in listoflines:
        p+=1
        if p % 10000 == 0:
            print p, 'lines processed'
        if line[0]=='#':
            continue
        fields=line.strip().split('\t')
        chr=fields[fieldID]
        start=int(fields[fieldID+1])
        stop=int(fields[fieldID+2])
        RegionList.append((chr,start,stop))
        if CoverageDict.has_key(chr):
            for i in range(start,stop):
                CoverageDict[chr][i]=0
        else:
            CoverageDict[chr]={}
            for i in range(start,stop):
                CoverageDict[chr][i]=0

    print 'finished parsing regions'

    RegionList=list(Set(RegionList))
    RegionList.sort()
  
    print CoverageDict.keys()

    listoflines = open(wigfilename)
    i=0
    for line in listoflines:
        if line[0]=='#' or line[0]=='t':
            continue
        if i % 10000 == 0:
            print i, 'lines processed'
        i+=1
        fields=line.strip().split('\t')
        if len(fields)==1:
            fields=line.strip().split(' ')
        chr=fields[0]
        start=int(fields[1])
        stop=int(fields[2])
        score=float(fields[3])
        for j in range(start,stop):
            if CoverageDict[chr].has_key(j):
                CoverageDict[chr][j]+=score

    print 'finished parsing bedgraph'

    for (chr,start,stop) in RegionList:
        if start == stop:
            continue
        scores=[]
        for i in range(start,stop):
            scores.append(CoverageDict[chr][i])
        average=sum(scores)/float(len(scores))
        if average == 0:
            continue
        outline = '%s\t%s\t%s\t%s\t' % (chr, start, stop, max(scores)-min(scores))
        outfile.write(outline + '\n')

    outfile.close()
   
run()
