##################################
#                                #
# Last modified 7/29/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from cistematic.core import Genome
from cistematic.core.geneinfo import geneinfoDB
from commoncode import *

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s NPSoutputfilename regionfilename regionStartField regionPeakID outputfilename' % sys.argv[0]
        sys.exit(1)
    
    NPSfile = sys.argv[1]
    regionfile = sys.argv[2]
    fieldID=int(sys.argv[3])
    peakfieldID=int(sys.argv[4])
    outfilename = sys.argv[5]
   
    outfile=open(outfilename,'w')

    NPSDict={}
    regionDict={}
    NPS=open(NPSfile) 
    linelist=NPS.readlines()
    linelist.remove(linelist[0])
    for line in linelist:
        if line[0]=='#':
            continue
        fields=line.strip().split('\t') 
        if NPSDict.has_key(fields[0]):
            NPSDict[fields[0]].append((float(fields[1])+float(fields[2]))/2)
        else:
            NPSDict[fields[0]]=[]
            regionDict[fields[0]]=[]
            NPSDict[fields[0]].append((float(fields[1])+float(fields[2]))/2)
    regions=open(regionfile) 
    linelist=regions.readlines()
    for line in linelist:
        if line[0]=='#':
            continue
        fields=line.strip().split('\t')
        if regionDict.has_key(fields[fieldID]):
            tup=(int(fields[fieldID+1]),int(fields[fieldID+2]),int(fields[peakfieldID]))
            regionDict[fields[fieldID]].append(tup)
    
    outfile.write('#chr\tStart\tStop\tDistanceToClosestPosNucleososme\n')
    for chr in regionDict:
        NPSDict[chr].sort()
        print chr
        for (start,stop,peak) in regionDict[chr]:
            distance=peak-NPSDict[chr][0]
            for nucleosome in NPSDict[chr]:
                if math.fabs(peak-nucleosome)<math.fabs(distance):
                    distance=peak-nucleosome
            outline= '%s\t%s\t%s\t%s\t' % (chr,start,stop,distance)
            outfile.write(outline+'\n')

    outfile.close()
   
run()