##################################
#                                #
# Last modified 3/19/2009        # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from cistematic.core import Genome
from cistematic.core.geneinfo import geneinfoDB

try:
	import psyco
	psyco.full()
except:
	pass

def getSequence(hg,chromosome,start,stop,sense):
    
    DNA = {'A':'T','T':'A','G':'C','C':'G','N':'N','a':'t','t':'a','g':'c','c':'g','n':'n'}
    chromosome = chromosome[3:len(chromosome)]
    if sense=='F' or sense=='+':
        sequence = hg.sequence(chromosome,start,stop-start)
    if sense=='R' or sense=='-':
        preliminarysequence = hg.sequence(chromosome,start,stop-start)
        sequence=''
        for i in range(len(preliminarysequence)):
            sequence=sequence+DNA[preliminarysequence[len(preliminarysequence)-i-1]]
    
    return sequence

def run():

    if len(sys.argv) < 7:
        print 'usage: python %s <genome>  <junciton files> <Upstream Exonic bp> <Upstream Intronic bp> <Downstream Intronic bp> <Downstream Exonic bp> <outfilename prefix> ' % sys.argv[0]
        sys.exit(1)

    genome = sys.argv[1]
    inputfilename = sys.argv[2]
    UpEx = int(sys.argv[3])
    UpIn = int(sys.argv[4])
    DownIn = int(sys.argv[5])
    DownEx = int(sys.argv[6])
    outputfilenameUpEx = sys.argv[7]+'UpstreamExonic.'+str(UpEx)+'bp.fasta'
    outputfilenameUpIn = sys.argv[7]+'UpstreamExonic.'+str(UpIn)+'bp.fasta'
    outputfilenameDownIn = sys.argv[7]+'UpstreamExonic.'+str(DownIn)+'bp.fasta'
    outputfilenameDownEx = sys.argv[7]+'UpstreamExonic.'+str(DownEx)+'bp.fasta'

    hg = Genome(genome)

    outfileUpEx = open(outputfilenameUpEx, 'w')
    outfileUpIn = open(outputfilenameUpIn, 'w')
    outfileDownIn = open(outputfilenameDownIn, 'w')
    outfileDownEx = open(outputfilenameDownEx, 'w')

    listoflines = open(inputfilename)
    TranscriptDict={}
    i=0
    print 'Inputting annotation'
    for line in listoflines:
        i+=1
        if i % 10000 == 0:
            print i, 'lines processed'
        fields=line.split('\t')
        chr=fields[0]
        orientation=fields[3]
        if orientation == '+':
            Up=int(fields[1])
            Down=int(fields[2])
            sequenceUpEx=getSequence(hg,chr,Up-UpEx,Up,'+')
            sequenceUpIn=getSequence(hg,chr,Up,Up+UpIn,'+')
            sequenceDownIn=getSequence(hg,chr,Down-DownIn,Down,'+')
            sequenceDownEx=getSequence(hg,chr,Down,Down+DownEx,'+')
        if orientation == '-':
            Up=int(fields[2])
            Down=int(fields[1])
            sequenceUpEx=getSequence(hg,chr,Up,Up+UpEx,'-')
            sequenceUpIn=getSequence(hg,chr,Up-UpIn,Up,'-')
            sequenceDownIn=getSequence(hg,chr,Down,Down+DownIn,'-')
            sequenceDownEx=getSequence(hg,chr,Down-DownEx,Down,'-')
        outline='>'+chr+':'+fields[1]+'-'+fields[2]+fields[3]
        outfileUpEx.write(outline+'\n')
        outfileUpIn.write(outline+'\n')
        outfileDownIn.write(outline+'\n')
        outfileDownEx.write(outline+'\n')
        outfileUpEx.write(sequenceUpEx+'\n')
        outfileUpIn.write(sequenceUpIn+'\n')
        outfileDownIn.write(sequenceDownIn+'\n')
        outfileDownEx.write(sequenceDownEx+'\n')
        
        outline=outline+'\t'+ORF+'\t'+sequence
        outfile.write(outline+'\n')

    outfileUpEx.close()
    outfileUpIn.close()
    outfileDownIn.close()
    outfileDownEx.close()

run()

