##################################
#                                #
# Last modified 01/14/2012       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import pygr
from pygr import worldbase

def getReverseComplement(DNA,sequence):
    
    revsequence = ''
    for i in range(len(sequence)):
        revsequence =revsequence + DNA[sequence[len(sequence)-i-1]]
    
    return revsequence

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s  worldbase_species_name genome_version inputfilename outpurfilename [-seqradius bp] [-usepeak fieldID] [-chrfield fieldID] [-first number] [-name fieldID] [-strand fieldID]' % sys.argv[0]
        print '       example: python /woldlab/castor/data00/home/georgi/code/getfastaPYGR.py input outfile.fasta -seqradius 50 -usepeak 3 -chrfield 0' 
        sys.exit(1)

    genome_name = sys.argv[1]
    genome_version = sys.argv[2]
    inputfilename = sys.argv[3]
    outfilename = sys.argv[4]

    DNA = {'A':'T','T':'A','G':'C','C':'G','N':'N','a':'t','t':'a','g':'c','c':'g','n':'n'}

    fieldID=1
    if '-chrfield' in sys.argv:
        fieldID = int(sys.argv[sys.argv.index('-chrfield') + 1])
    
    doName = False
    if '-name' in sys.argv:
        doName = True
        nameField = int(sys.argv[sys.argv.index('-name') + 1])

    doStrand = False
    if '-strand' in sys.argv:
        doStrand = True
        strandField = int(sys.argv[sys.argv.index('-strand') + 1])

    doPeak = False
    if '-usepeak' in sys.argv:
        doPeak = True
        peakFieldID = int(sys.argv[sys.argv.index('-usepeak') + 1])
    if '-seqradius' in sys.argv:
        radius = int(sys.argv[sys.argv.index('-seqradius') + 1])

    doTop=False   
    if '-first' in sys.argv: 
        doTop=True
        top = int(sys.argv[sys.argv.index('-top') + 1])

    exec('genome = worldbase.Bio.Seq.Genome.' + genome_name + '.' + genome_version + '()')

    outfile = open(outfilename, 'w')

    inputdatafile = open(inputfilename)
    i=0
    for line in inputdatafile:
        i+=1
        if doTop and i >= top:
            continue
        if line[0]=='#':
            continue
        fields = line.strip().split('\t')
        chr = fields[fieldID].strip()
        if doPeak:
            peak = int(fields[peakFieldID].strip())
            start = peak-radius
            stop = peak+radius
        else:
            start = int(fields[fieldID+1].strip())
            stop = int(fields[fieldID+2].strip())
        strand = '+'
        sequence=str(genome[chr][start:stop])
        if doStrand:
            strand=fields[strandField]
            if strand == 'F' or strand == '+':
                sense='+'
            if strand == 'R' or strand == '-':
                sense='+'
                sequence = getReverseComplement(DNA,sequence)
        if doName:
            name=fields[nameField]
            outfile.write('>'+name+'\n')
        else:
            outfile.write('>'+chr+':'+str(start)+'-'+str(stop)+'\n')
        outfile.write(sequence + '\n')
   
run()
