##################################
#                                #
# Last modified 03/07/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s refFlat radius outputfilename  ' % sys.argv[0]
        sys.exit(1)
    
    refFlat = sys.argv[1]
    radius = int(sys.argv[2])
    outfilename = sys.argv[3]
    outfile = open(outfilename, 'w')

    GeneDict={}
    linelist=open(refFlat)
    for line in linelist:
        fields=line.split('\t')
        gene=fields[0]
        chr=fields[2]
        ID=fields[1]
        strand=fields[3]
        left=int(fields[4])
        right=int(fields[7])
        if GeneDict.has_key(chr):
            pass
        else:
            GeneDict[chr]={}
        if GeneDict[chr].has_key(gene):
            pass
        else:
            GeneDict[chr][gene]={}
            GeneDict[chr][gene]['coordinates']=[]
            GeneDict[chr][gene]['strand']=strand
            GeneDict[chr][gene]['ID']=[]
        GeneDict[chr][gene]['coordinates'].append(left)
        GeneDict[chr][gene]['coordinates'].append(right)
        GeneDict[chr][gene]['ID'].append(ID)

    for chr in GeneDict.keys():
        for gene in GeneDict[chr].keys():
            left=min(GeneDict[chr][gene]['coordinates'])
            right=max(GeneDict[chr][gene]['coordinates'])
            GeneDict[chr][gene]['coordinates']=(left,right)

    outlineList=[]  
    for chr in GeneDict.keys():
        print chr
        for gene1 in GeneDict[chr].keys():
            (left1,right1) = GeneDict[chr][gene1]['coordinates']
            Cleared=True
            if math.fabs(left1-right1) < radius:
                continue
            for gene2 in GeneDict[chr].keys():
                (left2,right2) = GeneDict[chr][gene1]['coordinates']
                if math.fabs(left1-right2) < radius or math.fabs(right1-left2) < radius:
                    Cleared=False
                    break
            if Cleared:
                outlineList.append((chr,left1,right1,strand,gene1))

    outlineList.sort()
    for (chr,left1,right1,strand,gene) in outlineList:
        outline= '%s\t%s\t%s\t%s\t%s\t' % (gene,chr,left1,right1,strand)
        for ID in GeneDict[chr][gene]['ID']:
            outline=outline+ID+','
        outfile.write(outline[0:-1]+'\n')

    outfile.close()
   
run()
