##################################
#                                #
# Last modified 2016/09/11       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s fasta blockSize outputfilename [-replaceXwithN] [-replaceNames correspondence_file] [-splitNamesSpace]' % sys.argv[0]
        print '\t format of correspondence_file for sequence name replacement: <old name> <tab> <new name>'
        print '\t note: if the sequence name replacement option is used, sequences for which no new name is specified will be skipped'
        sys.exit(1)

    fasta = sys.argv[1]
    blocksize = int(sys.argv[2])
    outfilename = sys.argv[3]

    doSNS = False
    if '-splitNamesSpace' in sys.argv:
        doSNS = True

    doXtoN = False
    if '-replaceXwithN' in sys.argv:
        doXtoN = True

    doReplaceNames = False
    if '-replaceNames' in sys.argv:
        doReplaceNames = True
        print 'will replace sequence names'
        linelist = open(sys.argv[sys.argv.index('-replaceNames') + 1])
        ReplaceMentDict = {}
        for line in linelist:
            fields = line.strip().split('\t')
            if line.strip() == '':
                continue
            ReplaceMentDict[fields[0]] = fields[1]

    outfile = open(outfilename, 'w')
    
    inputdatafile = open(fasta)
    ID=''
    for line in inputdatafile:
        if line[0]=='>':
            if ID == '':
                ID = line.strip().split('>')[1]
            else:
                sequence = ''.join(sequence)
                if doXtoN:
                    sequence = sequence.replace('X','N').replace('x','n')
                doSkip = False
                if doReplaceNames:
                    if ReplaceMentDict.has_key(ID):
                        doSkip = False
                        ID = ReplaceMentDict[ID]
                    else:
                        doSkip = True
                if doSkip:
                    pass
                else:
                    if doSNS:
                        outfile.write('>' + ID.split(' ')[0] + '\n')
                    else:
                        outfile.write('>' + ID + '\n')
                    for i in range(0,len(sequence),blocksize):
                        outfile.write(sequence[i:min(i+blocksize, len(sequence))] + '\n')
                ID = line.strip().split('>')[1]
            sequence=[]
        else:
            sequence.append(line.strip())

    sequence = ''.join(sequence)
    if doXtoN:
        sequence = sequence.replace('X','N').replace('x','n')
    doSkip = False
    if doReplaceNames:
        if ReplaceMentDict.has_key(ID):
            doSkip = False
            ID = ReplaceMentDict[ID]
        else:
            doSkip = True

    if doSkip:
        pass
    else:
        if doSNS:
            outfile.write('>' + ID.split(' ')[0] + '\n')
        else:
            outfile.write('>' + ID + '\n')
        for i in range(0,len(sequence),blocksize):
            outfile.write(sequence[i:min(i+blocksize, len(sequence))] + '\n')
   
run()
