##################################
#                                #
# Last modified 02/16/2015       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s clw blockSize outfile' % sys.argv[0]
        print '\t The script assumend the following:'
        print '\t the first line begins with "CLUSTAL multiple sequence alignment"'
        print '\t there are two empty lines separating each alignment block'
        sys.exit(1)

    input = sys.argv[1]
    BS = int(sys.argv[2])
    outfilename = sys.argv[3]

    linelist = open(input)
    maxIDlength = 0
    EmptyLines = 0
    TotalAlignment = 0
    PosDict = {}
    SeqDict = {}
    for line in linelist:
        if line.startswith('CLUSTAL multiple sequence alignment by MUSCLE'):
            continue
        if line.strip() == '' or line.startswith('                         '):
            EmptyLines+=1
            if EmptyLines % 2 == 0:
                K=0
            continue
        fields = line.strip().split(' ')
        ID = fields[0]
        if len(ID) > maxIDlength:
            maxIDlength = len(ID)
        alignment = fields[-1]
        K+=1
        if K == 1:
            TotalAlignment += len(alignment)
        PosDict[K] = ID
        if EmptyLines/2 == 1:
            SeqDict[K] = ''
        SeqDict[K] += alignment

    outfile = open(outfilename,'w')

    for i in range(0,TotalAlignment,BS):
        print i
        for j in range(1,K+1):
            outline = PosDict[j]
            for t in range(maxIDlength - len(PosDict[j])):
                outline = outline + ' '
            outline = outline + '\t'  + SeqDict[j][i:min(i+BS,TotalAlignment)]
            outfile.write(outline + '\n')
        outfile.write('\n')
        outfile.write('\n')

    outfile.close()


run()

