##################################
#                                #
# Last modified 5/15/2009        # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from cistematic.core import Genome
from cistematic.core.geneinfo import geneinfoDB
from commoncode import *

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s genome outputfilename  ' % sys.argv[0]
        sys.exit(1)
    
    genome = sys.argv[1]
    outfilename = sys.argv[2]
    outfile = open(outfilename, 'w')

    genes = {}
    hg = Genome(genome)
    idb = geneinfoDB()
    geneinfoDict = idb.getallGeneInfo(genome)
    featDict = hg.getallGeneFeatures()
    geneIDs = featDict.keys()
    i=0
    outfile.write('GeneID\tGeneName\tChr\tStart\tEnd\tOrientation\n')
    for k in featDict.keys():
        if i % 1000 == 0:
            print len(featDict.keys())-i 
        i+=1
        start=0
        stop=0
        if idb.getGeneInfo((genome,k))==[]:
            name = 'LOC'+str(k)
        else:
            name = idb.getGeneInfo((genome,k))[0]
        genes[name]={}
        leftPos=[]
        rightPos=[]
        for feature in featDict[k]:
            leftPos.append(int(feature[2]))
            rightPos.append(int(feature[3]))
        chr= 'chr'+str(featDict[k][0][1])
        orientation=str(featDict[k][0][4])
        rmin=min(leftPos)
        rmax=max(rightPos)
        outline = '%s\t%s\t%s\t%s\t%s\t%s\t' % (k, name, chr, rmin, rmax, orientation)
        outfile.write(outline + '\n')

    outfile.close()
   
run()
