##################################
#                                #
# Last modified 2018/05/12       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import math
import string
import os
from sets import Set

def run():

    if len(sys.argv) < 8:
        print 'usage: python %s motifs.bed motifChrFieldID radius regioncalls peakChrFieldID peakPosFieldID scoreFieldID outputfilename [-narrowPeak] [-matchesOnly]' % sys.argv[0]
        sys.exit(1)
    
    motifs = sys.argv[1]
    motifChrFieldID = int(sys.argv[2])
    radius = int(sys.argv[3])
    regioncalls = sys.argv[4]
    peakChrFieldID = int(sys.argv[5])
    peakFieldID = int(sys.argv[6])
    scoreFieldID = int(sys.argv[7])
    outfilename = sys.argv[8]

    doNP = False
    if '-narrowPeak' in sys.argv:
        doNP = True

    doMO = False
    if '-matchesOnly' in sys.argv:
        doMO = True

    regionList = []
    if regioncalls.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + regioncalls
    elif regioncalls.endswith('.gz'):
        cmd = 'gunzip -c ' + regioncalls
    elif regioncalls.endswith('.zip'):
        cmd = 'unzip -p ' + regioncalls
    else:
        cmd = 'cat ' + regioncalls
    p = os.popen(cmd, "r")
    line = 'line'
    while line != '':
        line = p.readline().strip()
        if line == '':
            break
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        if doNP:
            chr = fields[0]
            left = int(fields[1])
            right = int(fields[2])
            peak = int(fields[1]) + int(fields[9])
        else:
            chr = fields[peakChrFieldID]
            left = int(fields[peakChrFieldID + 1])
            right = int(fields[peakChrFieldID + 2])
            peak = int(fields[peakFieldID])
        score = float(fields[scoreFieldID])
        regionList.append((score,chr,left,right,peak))
    
    MotifPosDict = {}
    if motifs.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + motifs
    elif motifs.endswith('.gz'):
        cmd = 'gunzip -c ' + motifs
    elif motifs.endswith('.zip'):
        cmd = 'unzip -p ' + motifs
    else:
        cmd = 'cat ' + motifs
    p = os.popen(cmd, "r")
    line = 'line'
    while line != '':
        line = p.readline().strip()
        if line == '':
            break
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        chr = fields[motifChrFieldID]
        left = int(fields[motifChrFieldID + 1])
        right = int(fields[motifChrFieldID + 2])
        strand = fields[motifChrFieldID + 3]
        middle = int((right + left)/2.0)
        if MotifPosDict.has_key(chr):
            pass
        else:
            MotifPosDict[chr] = {}
        if MotifPosDict[chr].has_key(middle):
            if MotifPosDict[chr][middle] != strand:
                MotifPosDict[chr][middle] = '+/-'
        else:
            MotifPosDict[chr][middle] = strand

    regionList.sort()
    regionList.reverse()

    outfile = open(outfilename, 'w')
    outline = '#chr\tleft\tright\tpeak\tscore\trank\tmotifPos\tmotifStrand'
    outfile.write(outline + '\n')

    rank = 0
    for (RPM,chr,left,right,peak) in regionList:
        rank += 1
        outline = chr + '\t' + str(left) + '\t' + str(right) + '\t' + str(peak) + '\t' + str(RPM)
        nearest = (radius,'')
        if MotifPosDict.has_key(chr):
            for i in range(peak - radius,peak + radius):
                if MotifPosDict[chr].has_key(i):
                    if math.fabs(nearest[0]) > math.fabs(i-peak):
                        nearest = (i-peak,MotifPosDict[chr][i])
        else:
            pass
        if nearest != (radius,''):
            outline = outline + '\t' + str(rank) + '\t' + str(nearest[0]) + '\t' + nearest[1]
            outfile.write(outline + '\n')
        else:
            if doMO:
                pass
            else:
                outline = outline + '\t' + str(rank) + '\t-\t-'
                outfile.write(outline + '\n')

    outfile.close()
            
run()
