##################################
#                                #
# Last modified 9/13/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s getallsitesoutput regionfilename outputfilename [-cisGenome]' % sys.argv[0]
        sys.exit(1)
    
    cachePages = 2000000

    getallsites = sys.argv[1]
    regionfilename = sys.argv[2]
    outfilename = sys.argv[3]

    doCisGenome=False
    if '-cisGenome' in sys.argv:
        doCisGenome=True
        print 'cisGenome input'
    outfile = open(outfilename, 'w')

    inputdatafile = open(getallsites)
    linelist = inputdatafile.readlines()
    regionDict={}
    for line in linelist:
        if line[0]=='#':
            continue
        fields=line.strip().split('\t')
        if regionDict.has_key(fields[4]):
            regionDict[fields[4]]['motifs'].append(fields[0])
        else:
            regionDict[fields[4]]={}
            regionDict[fields[4]]['motifs']=[]
            regionDict[fields[4]]['motifs'].append(fields[0])

    inputdatafile = open(regionfilename)
    linelist = inputdatafile.readlines()
    for line in linelist:
        if line[0]=='#':
            continue
        fields=line.strip().split('\t')
        regionDictID=fields[1].strip()+':'+fields[2].strip()+'-'+fields[3].strip()
        if doCisGenome:
            peak=int(fields[7].strip())
        else:
            peak=int(fields[9])
        distance=10000000
        currentClosest=distance
        try: 
            for motif in regionDict[regionDictID]['motifs']:
                motifCooridnates=motif.split(':')[1].split('-')
                motifCenter=int((int(motifCooridnates[1])+int(motifCooridnates[0]))/2.0)
                if math.fabs(peak-motifCenter)<=distance:
                    distance=math.fabs(peak-motifCenter)
                    currentClosest=motifCenter-peak
        except:
            print 'region', regionDictID, 'without motif, skipping'
            continue
        outline=regionDictID+'\t'+str(currentClosest)+'\n'
        outfile.write(outline)

    outfile.close()
run()

