##################################
#                                #
# Last modified 5/6/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from cistematic.core import Genome
from cistematic.core import complement

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s genome list-of-sites-file (chr-tab-start-tab-stop) outfilename' % sys.argv[0]
        sys.exit(1)

    genome = sys.argv[1]
    sitefilename = sys.argv[2]
    outfilename = sys.argv[3]

    hg = Genome(genome)
    
    outfile = open(outfilename, 'w')

    sitefile = open(sitefilename)
    siteslist = sitefile.readlines()
#    outfile.write(siteslist[0].split('\n')[0])
#    outfile.write('\tsequence\n')
#    siteslist.remove(siteslist[0])
    for line in siteslist:
        print line
        fields = line.split('\n')[0].split('\t')
        chromosome = fields[0]
        chromosome = chromosome[3:len(chromosome)]
        start = int(fields[1])
        stop = int(fields[2])
        sequence = string.lower(hg.sequence(chromosome,start,stop-start))
        k=0
        mot1 = 'caggtg'
        mot2 = 'cagctg'
        cmot1 = complement(mot1)
        cmot2 = complement(mot2)

        positionsfound = []
        for i in range(len(sequence)-6):
            if sequence[i:i+6]==mot1 or sequence[i:i+6]==mot2 or sequence[i:i+6]==cmot1 or sequence[i:i+6]==cmot2:
                positionsfound.append(i)

        positionsfound.append(len(sequence))

        if len(positionsfound)==0:
            outfile.write(line.split('\n')[0])
            outfile.write('\t')
            outfile.write(sequence)
            outfile.write('\n')
        else:
            positionsfound.sort()
            outfile.write(line.split('\n')[0])
            outfile.write('\t')
            outfile.write(sequence[0:positionsfound[0]])
            for i in range(len(positionsfound)-1):
                outfile.write(string.upper(sequence[positionsfound[i]:positionsfound[i]+6]))            
                outfile.write(sequence[positionsfound[i]+6:positionsfound[i+1]])             
        outfile.write('\n')
    outfile.close()     

run()
