##################################
#                                #
# Last modified 06/24/2012       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s transcriptome_fasta outputfilename' % sys.argv[0]
        print ' the transcript will also fix any transcripts for which the name does not have the ":" separator'
        print ' the transcript will remove all A-resides at the end of a transcript except for the first 10'
        sys.exit(1)

    fasta = sys.argv[1]
    outfilename = sys.argv[2]

    outfile = open(outfilename, 'w')
    
    inputdatafile = open(fasta)
    ID=''
    for line in inputdatafile:
        if line[0]=='>':
            if ID == '':
                ID = line.strip().split('>')[1]
                if ':' in ID:
                    pass
                else:
                    ID = ID + ':' + ID
            else:
                sequence = ''.join(sequence)
                outfile.write('>' + ID + '\n' )
                sequence = sequence.upper()
                while sequence.endswith('AAAAAAAAAA'):
                    sequence = sequence[0:-1]
                for i in range(0,len(sequence),50):
                    outfile.write(sequence[i:min(i+50, len(sequence))] + '\n')
                ID = line.strip().split('>')[1]
                if ':' in ID:
                    pass
                else:
                    ID = ID + ':' + ID
            sequence=[]
        else:
            sequence.append(line.strip())   
    sequence = ''.join(sequence)
    outfile.write('>' + ID + '\n' )
    sequence = sequence.upper()
    while sequence.endswith('AAAAAAAAAA'):
        sequence = sequence[0:-1]
    for i in range(0,len(sequence),50):
        outfile.write(sequence[i:min(i+50, len(sequence))] + '\n')
   
run()
