##################################
#                                #
# Last modified 11/06/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import math
import string
from sets import Set


def run():

    if len(sys.argv) < 4:
        print 'usage: python %s getallsites radius regioncalls outputfilename' % sys.argv[0]
        sys.exit(1)
    
    getallsites = sys.argv[1]
    radius = int(sys.argv[2])
    regioncalls = sys.argv[3]
    outfilename = sys.argv[4]

    regionListSorted={}
    regionDict={}
    linelist=open(regioncalls)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        if fields[0]=='chr':
            continue
        chr=fields[0]
        start=int(fields[1])-1
        stop=int(fields[2])
        PValue=float(fields[6])
        peak=start+int(fields[4])
        regionListSorted[(PValue,chr,start,stop,peak)]=[]
        ID=chr+':'+str(start)+'-'+str(stop)
        regionDict[ID]=(PValue,chr,start,stop,peak)
    
    linelist=open(getallsites)
    for line in linelist:
        fields=line.strip().split('\t')
        ID=fields[4]
        motif=fields[0]
        motifPosLeft=int(fields[0].split(':')[1].split('-')[0])
        motifPosRight=int(fields[0].split(':')[1].split('-')[1])
        motifPos=int((motifPosRight+motifPosLeft)/2.)
        Peak=regionDict[ID][4]
        distance=motifPos-Peak
        regionListSorted[regionDict[ID]].append(distance)

    keys=regionListSorted.keys()
    keys.sort()
    keys.reverse()

    outfile = open(outfilename, 'w')

    i=1
    present=0
    for (PValue,chr,start,stop,peak) in keys:
        if len(regionListSorted[(PValue,chr,start,stop,peak)])==0:
            continue
        MotifPresent=False
        for distance in regionListSorted[(PValue,chr,start,stop,peak)]:
            if math.fabs(distance) <= radius:
                outline=str(i)+'\t'+str(distance)
                outfile.write(outline+'\n')
                MotifPresent=True
        if MotifPresent:
            present+=1
        i+=1
        
    noMotif=len(keys)-present

    outline='#No motif found in:\n' 
    outfile.write(outline)
    outline=str(noMotif)+'/'+str(len(keys))+'\n'
    outfile.write(outline)
    outfile.close()
            
run()
