##################################
#                                #
# Last modified 8/10/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import operator
import string
from sets import Set


try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s knownGenesfilename outfilename' % sys.argv[0]
        sys.exit(1)

    inputfilename = sys.argv[1]
    outputfilename = sys.argv[2]

    outfile = open(outputfilename, 'w')

    listoflines = open(inputfilename)
    lineslist = listoflines.readlines()
    Introns=[]
    i=0
    for line in lineslist:
        i+=1
        fields=line.split('\t')
        chr=fields[1]
        starts=fields[8].split(',')
        ends=fields[9].split(',')
        for i in range(len(starts)-2):
            Introns.append((chr,int(ends[i]),int(starts[i+1])))
    Introns=Set(Introns)
    Introns=list(Introns)
    Introns=sorted(Introns,key=lambda x:(x[0],x[1],x[2]))
    for (chr,start,end) in Introns:
        outline=chr+'\t'+str(start)+'\t'+str(end)+'\n'
        outfile.write(outline)
    outfile.close()

run()

