##################################
#                                #
# Last modified 5/6/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from cistematic.core import Genome
from cistematic.core.geneinfo import geneinfoDB

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s genome listofsmiRNAs outfilename' % sys.argv[0]
        sys.exit(1)

    cachePages = 500000

    genome = sys.argv[1]
    listofmiRNAs = sys.argv[2]
    outfilename = sys.argv[3]

    hg = Genome(genome)
    idb = geneinfoDB()
    geneinfoDict = idb.getallGeneInfo(genome)
    featDict = hg.getallGeneFeatures()
    geneIDs = featDict.keys()

    miRNAs = {}

    listofmiRNAs = open(listofmiRNAs)
    lineslist = listofmiRNAs.readlines()
    for line in lineslist:
        fields = line.split('\n')[0].split('\t')
        miRNAs[fields[0]]={}
        miRNAs[fields[0]]['ID']=fields[0]
        miRNAs[fields[0]]['chromosome']=fields[1]
        miRNAs[fields[0]]['leftPos']=int(fields[2])
        miRNAs[fields[0]]['rightPos']=int(fields[3])
        miRNAs[fields[0]]['orientation']=fields[4]

    print 'done parsing miRNA'

    outfile = open(outfilename, 'w')
    outfile.write('miRNA\tchromosome\tleftPos\trightPos\torientation\tgeneID\tgeneName\t\intron\tGeneOrientaiton\n')

    genes={}

    t=len(geneIDs)
    for k in geneIDs:
        genes[k]={}
        if idb.getGeneInfo((genome,k))==[]:
            genes[k]['geneName'] = 'LOC'+str(k)
        else:
            genes[k]['geneName']=idb.getGeneInfo((genome,k))[0]
        genes[k]['chromosome'] = 'chr' + str(featDict[k][0][1])
        genes[k]['left'] = []
        genes[k]['right'] = []
        for feature in featDict[k]:
            genes[k]['left'].append(feature[2])
            genes[k]['right'].append(feature[3])
        genes[k]['leftPos'] = min(genes[k]['left'])
        genes[k]['rightPos'] = max(genes[k]['right'])
        genes[k]['orientation'] = str(featDict[k][0][4])
        print t
        t-=1

    print 'done parsing gene database'

    for miR in miRNAs.keys():
        for k in genes.keys():
            if miRNAs[miR]['chromosome']!=genes[k]['chromosome']:
                continue
            elif miRNAs[miR]['leftPos']>genes[k]['rightPos'] or miRNAs[miR]['rightPos']<genes[k]['leftPos']:
                continue
            if miRNAs[miR]['leftPos']>genes[k]['leftPos'] and miRNAs[miR]['rightPos']<genes[k]['rightPos']:
                miRNAs[miR]['geneID']=k
                miRNAs[miR]['geneName']=genes[k]['geneName']
                miRNAs[miR]['intron']='check manually'
                for i in range(len(genes[k]['left'])-1):
                    if miRNAs[miR]['leftPos']>genes[k]['right'][i] and miRNAs[miR]['leftPos'] < genes[k]['left'][i+1]:
                        intron = 'intron' + str(i)
                        miRNAs[miR]['intron']=intron
                    if miRNAs[miR]['leftPos']>genes[k]['left'][i] and miRNAs[miR]['leftPos'] < genes[k]['right'][i]:
                        intron = 'exon' + str(i)
                        miRNAs[miR]['intron']=intron
                outfile.write(miRNAs[miR]['ID'])
                outfile.write('\t')
                outfile.write(miRNAs[miR]['chromosome'])
                outfile.write('\t')
                outfile.write(str(miRNAs[miR]['leftPos']))
                outfile.write('\t')
                outfile.write(str(miRNAs[miR]['rightPos']))
                outfile.write('\t')
                outfile.write(miRNAs[miR]['orientation'])
                outfile.write('\t')
                outfile.write(miRNAs[miR]['geneID'])
                outfile.write('\t')
                outfile.write(miRNAs[miR]['geneName'])
                outfile.write('\t')
                outfile.write(miRNAs[miR]['intron'])
                outfile.write('\t')
                outfile.write(genes[k]['orientation'])
                outfile.write('\n')

    for miR in miRNAs.keys():
        if 'geneName' not in miRNAs[miR].keys():
            outfile.write(miRNAs[miR]['ID'])
            outfile.write('\t')
            outfile.write(miRNAs[miR]['chromosome'])
            outfile.write('\t')
            outfile.write(str(miRNAs[miR]['leftPos']))
            outfile.write('\t')
            outfile.write(str(miRNAs[miR]['rightPos']))
            outfile.write('\t')
            outfile.write(miRNAs[miR]['orientation'])
            outfile.write('\t-\t-\t-\t-\n')            
            
    outfile.close()

run()