##################################
#                                #
# Last modified 5/6/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from cistematic.core import Genome
from cistematic.core.geneinfo import geneinfoDB

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s genome distancefromgetallgenesfilename outfilename [-radius bp]' % sys.argv[0]
        sys.exit(1)

    genome = sys.argv[1]
    infilename = sys.argv[2]
    outfilename = sys.argv[3]
    doRadius=False
    if '-radius' in sys.argv:
        doRadius=True
        radius = int(sys.argv[sys.argv.index('-radius')+1])

    genes = {}
    hg = Genome(genome)
    idb = geneinfoDB()
    geneinfoDict = idb.getallGeneInfo(genome)
    featDict = hg.getallGeneFeatures()
    geneIDs = featDict.keys()
    i=0
    for k in featDict.keys():
#        print i
        i+=1
        start=0
        stop=0
        if idb.getGeneInfo((genome,k))==[]:
            name = 'LOC'+str(k)
        else:
            name = idb.getGeneInfo((genome,k))[0]
        genes[name]={}
        leftPos=[]
        rightPos=[]
        for feature in featDict[k]:
            leftPos.append(int(feature[2]))
            rightPos.append(int(feature[3]))
#        print name
        genes[name]['geneID']=k
        genes[name]['name']=name
        genes[name]['chromosome']= 'chr'+str(featDict[k][0][1])
        genes[name]['leftPos']=min(leftPos)
        genes[name]['rightPos']=max(rightPos)
        genes[name]['orientation']=str(featDict[k][0][4])
        genes[name]['sites'] = []

    print 'Finished parsing gene models'

    outfile = open(outfilename, 'w')
    outfile.write('Gene ID\tGene Name\tChromosome\tGene Start\tGene End\tGene Orientation\tGene Length\tNumber of sites\t Distances to TSS\n')

    listoflines = open(infilename)
    lineslist = listoflines.readlines()
    lineslist.remove(lineslist[0])
    lineslist.remove(lineslist[0])
    for line in lineslist:
        fields=line.split('\n')[0].split('\t')
        genename = fields[0].split(' ')[0]
        if doRadius:
            if math.fabs(int(fields[12]))<=radius:
                genes[genename]['sites'].append(fields[12])
        else:
            genes[genename]['sites'].append(fields[12])

    for genename in genes.keys():
        outfile.write(genes[genename]['geneID'])
        outfile.write('\t')
        outfile.write(genes[genename]['name'])
        outfile.write('\t')
        outfile.write(genes[genename]['chromosome'])
        outfile.write('\t')
        outfile.write(str(genes[genename]['leftPos']))
        outfile.write('\t')
        outfile.write(str(genes[genename]['rightPos']))
        outfile.write('\t')
        outfile.write(genes[genename]['orientation'])
        outfile.write('\t')
        outfile.write(str(int(math.fabs(genes[genename]['rightPos']-genes[genename]['leftPos']))))
        outfile.write('\t')
        outfile.write(str(len(genes[genename]['sites'])))
        outfile.write('\t')
        for site in genes[genename]['sites']:
            outfile.write(site)
            outfile.write(', ')
        outfile.write('\n')

    outfile.close()

run()

