##################################
#                                #
# Last modified 03/26/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s bed gtf chrField radius outputfilename  ' % sys.argv[0]
        sys.exit(1)
    
    bed = sys.argv[1]
    gtf = sys.argv[2]
    chrFieldID = int(sys.argv[3])
    radius = int(sys.argv[4])
    outfilename = sys.argv[5]
    outfile = open(outfilename, 'w')

    GeneDict={}
    linelist=open(gtf)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.split('\t')
        chr=fields[0]
        left=int(fields[3])
        right=int(fields[4])
        gene=fields[8].split('gene_id "')[1].split('";')[0]
        if GeneDict.has_key(chr):
            pass
        else:
            GeneDict[chr]={}
        if GeneDict[chr].has_key(gene):
            pass
        else:
            GeneDict[chr][gene]={}
            GeneDict[chr][gene]['coordinates']=[]
        GeneDict[chr][gene]['coordinates'].append(left)
        GeneDict[chr][gene]['coordinates'].append(right)

    for chr in GeneDict.keys():
        for gene in GeneDict[chr].keys():
            left=min(GeneDict[chr][gene]['coordinates'])
            right=max(GeneDict[chr][gene]['coordinates'])
            GeneDict[chr][gene]['coordinates']=(left,right)

    linelist=open(bed) 
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        chr = fields[chrFieldID]
        left = int(fields[chrFieldID+1])
        right = int(fields[chrFieldID+2])
        if GeneDict.has_key(chr):
            pass
        else:
            continue
        Cleared=True
        for gene in GeneDict[chr].keys():
            (left2,right2) = GeneDict[chr][gene]['coordinates']
            if math.fabs(left-right2) < radius or math.fabs(right-left2) < radius or (left > left2 and left < right2) or (right > left2 and right < right2):
                Cleared=False
                break
        if Cleared:
            outfile.write(line)

    outfile.close()
   
run()
