##################################
#                                #
# Last modified 6/14/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from cistematic.core import Genome
from cistematic.core.geneinfo import geneinfoDB

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s genome outfilename [-dropPseudo] [-dropnongenes listofgenesfilename]' % sys.argv[0]
        sys.exit(1)

    genome = sys.argv[1]
    outfilename = sys.argv[2]

    dropPSEUDO=False
    if '-dropPseudo' in sys.argv:
        dropPSEUDO=True

    dropNonGene=False
    if '-dropnongenes' in sys.argv:
        listofgenesfilename = sys.argv[sys.argv.index('-dropnongenes') + 1]
        dropNonGene=True

    if dropNonGene:
        genelist={}
        listofgenesfile = open(listofgenesfilename)
        lineslist = listofgenesfile.readlines()
        for line in lineslist:
            gene = line.split('\t')[0].strip()
            genelist[gene]={}

    bedsegments = {}
    hg = Genome(genome)
    idb = geneinfoDB()
    geneinfoDict = idb.getallGeneInfo(genome)
    featDict = hg.getallGeneFeatures()
    geneIDs = featDict.keys()
    i=0
    for k in featDict.keys():
        if i % 1000 == 0:
            print i
        i+=1
        if dropNonGene:
            if idb.getGeneInfo((genome,k))==[]:
                name = 'LOC'+str(k)
            else:
                name = idb.getGeneInfo((genome,k))[0]
            if genelist.has_key(name):
                pass
            else: 
                print 'pass'
                continue
        if dropPSEUDO and featDict[k][0][0]=='PSEUDO':
            continue
        for feature in featDict[k]:
            chromosome='chr'+str(feature[1]).split('|')[0]
            if bedsegments.has_key(chromosome):
                bedsegments[chromosome][feature[2]]=(chromosome, feature[2], feature[3])
            else:
                bedsegments[chromosome]={}
                bedsegments[chromosome][feature[2]]=(chromosome, feature[2], feature[3])

    outfile = open(outfilename, 'w')
    chromosomes=bedsegments.keys()
    chromosomes.sort()
    for chr in chromosomes:
        print chr
        keys=bedsegments[chr].keys()
        keys.sort()
        for pos in keys:
            (chr, start, stop)=bedsegments[chr][pos]
            outline='%s\t%s\t%s\t' % (chr, start, stop)
            outfile.write(outline + '\n')
    outfile.close()

run()

