##################################
#                                #
# Last modified 04/04/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from cistematic.core.geneinfo import geneinfoDB
from cistematic.genomes import Genome

try:
	import psyco
	psyco.full()
except:
	pass

def getSequence(hg,chromosome,start,stop):
    
    chromosome = chromosome[3:len(chromosome)]
    sequence = string.upper(hg.sequence(chromosome,start,stop-start))

    return sequence

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s inputfilename outfilename [-nomulti] [-polyA length genome]' % sys.argv[0]
        sys.exit(1)

    inputfilename = sys.argv[1]
    outputfilename = sys.argv[2]
    doMulti=True
    if '-nomulti' in sys.argv:
        doMulti=False
    doPolyA=True
    if '-polyA' in sys.argv:
        doPolyA=True
        genome=sys.argv[sys.argv.index('-polyA')+2]
        polyAlength=int(sys.argv[sys.argv.index('-polyA')+1])
        Atail=''
        Atailcomplement=''
        for i in range(polyAlength):
            Atail=Atail+'A'
            Atailcomplement=Atailcomplement+'T'
        hg = Genome(genome)

    outfile = open(outputfilename, 'w')

    lineslist = open(inputfilename)
    trackType=inputfilename.split('bowtie')[0]
    outfile.write('track name="%s" visibility=4 itemRgb="On"\n' % (trackType))
    i=0
    for line in lineslist:
        fields = line.strip().split('\t')
        score=1000
        i+=1
        if i % 100000 == 0:
            print i, 'alignments processed'
        print i
        if fields[6] != '0':
            if doMulti:
                score=200
            else:
                continue
        chrom=fields[2]     
        start=int(fields[3])
        stop=int(fields[3])+len(fields[4])
        orientation=fields[1]     
        name=fields[0]     
        if doPolyA:
            if orientation=='+':
                try:
                    sequence=getSequence(hg,chrom,stop,stop+polyAlength)
                except:
                    pass
                if sequence==Atail:
                    continue
            if orientation=='-':
                try:
                    sequence=getSequence(hg,chrom,start-polyAlength,start)
                except:
                    pass
                if sequence==Atailcomplement:
                    continue
        outline = '%s\t%s\t%s\t%s\t%s\t%s\n' % (chrom, start, stop, name, score, orientation)
        outfile.write(outline)
    print i, 'alignments processed'
    outfile.close()

run()

