##################################
#                                #
# Last modified 11/06/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import math
import string
from sets import Set


def run():

    if len(sys.argv) < 4:
        print 'usage: python %s getallsites radius regioncalls outputfilename' % sys.argv[0]
        sys.exit(1)
    
    getallsites = sys.argv[1]
    radius = int(sys.argv[2])
    regioncalls = sys.argv[3]
    outfilename = sys.argv[4]

    regionListSorted={}
    regionDict={}
    linelist=open(regioncalls)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        chr=fields[1]
        start=int(fields[2])
        stop=int(fields[3])
        RPM=float(fields[4])
        peak=int(fields[9])
        regionListSorted[(RPM,chr,start,stop,peak)]=[]
        ID=chr+':'+fields[2]+'-'+fields[3]
        regionDict[ID]=(RPM,chr,start,stop,peak)
    
    linelist=open(getallsites)
    for line in linelist:
        fields=line.strip().split('\t')
        ID=fields[4]
        motif=fields[0]
        motifPosLeft=int(fields[0].split(':')[1].split('-')[0])
        motifPosRight=int(fields[0].split(':')[1].split('-')[1])
        motifPos=int((motifPosRight+motifPosLeft)/2.)
        Peak=regionDict[ID][4]
        distance=motifPos-Peak
        regionListSorted[regionDict[ID]].append(distance)

    keys=regionListSorted.keys()
    keys.sort()
    keys.reverse()

    outfile = open(outfilename, 'w')

    i=1
    present=0
    for (RPM,chr,start,stop,peak) in keys:
        if len(regionListSorted[(RPM,chr,start,stop,peak)])==0:
            continue
        MotifPresent=False
        for distance in regionListSorted[(RPM,chr,start,stop,peak)]:
            if math.fabs(distance ) <= radius:
                outline=str(i)+'\t'+str(distance)
                outfile.write(outline+'\n')
                MotifPresent=True
        if MotifPresent:
            present+=1
        i+=1
        
    noMotif=len(keys)-present

    outline='#No motif found in:\n' 
    outfile.write(outline)
    outline=str(noMotif)+'/'+str(len(keys))+'\n'
    outfile.write(outline)
    outfile.close()
            
run()
