##################################
#                                #
# Last modified 02/03/2014       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 6:
        print 'usage: python %s bed chrFieldID leftFieldID rightFieldID maxDistance outfilename' % sys.argv[0]
        print '\tthis script will take any file with genomic coordinates and merge neighbouring regions if they are less than the distance specified by the maxDistance parameter' 
        sys.exit(1)

    inputfilename = sys.argv[1]
    chrFieldID = int(sys.argv[2])
    leftFieldID = int(sys.argv[3])
    rightFieldID = int(sys.argv[4])
    maxDist = int(sys.argv[5])
    outputfilename = sys.argv[6]

    RegionList=[]

    lineslist = open(inputfilename)
    i=0
    for line in lineslist:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        chr = fields[chrFieldID]
        left = int(fields[leftFieldID])
        right = int(fields[rightFieldID])
        RegionList.append((chr,left,right))

    RegionList.sort()

    CurrentChr = RegionList[0][0]
    CurrentLeft = RegionList[0][1]
    CurrentRight = RegionList[0][2]

    outfile = open(outputfilename, 'w')

    for i in range(1,len(RegionList)):
        (chr,left,right) = RegionList[i]
        if (chr == CurrentChr) and (CurrentRight + maxDist >= left):
            CurrentRight = right
            continue
        else:
            outline = CurrentChr + '\t' + str(CurrentLeft) + '\t' + str(CurrentRight)
            outfile.write(outline + '\n')
            (CurrentChr,CurrentLeft,CurrentRight) = (chr,left,right)

    outline = CurrentChr + '\t' + str(CurrentLeft) + '\t' + str(CurrentRight)
    outfile.write(outline + '\n')
   
    outfile.close()

run()