##################################
#                                #
# Last modified 06/01/2015       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def tabline(line):

    newline = line
    while '  ' in newline:
        newline = newline.replace('  ',' ')
    
    return newline.strip().replace(' ','\t')

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s PFAM output'
        print '\tuse - for stdin'
        sys.exit(1)

    input = sys.argv[1]
    outfilename = sys.argv[2]

    outfile = open(outfilename,'w')

    outline = '#Protein\tLength\tDomain\tDomain_description\tscore\tbias\tc-Evalue\ti-Evalue\thmmfrom\thmmto\talifrom\talito\tenvfrom \tenvto\tacc'
    outfile.write(outline + '\n')

    if input == '-':
        lineslist  = sys.stdin
    else:
        lineslist  = open(input)
    InDomains = False
    CurrentQuery = ''
    k = 0
    l = 0
    for line in lineslist:
        l+=1
        if line.startswith('#'):
            continue
        if line.strip() == '':
            continue
        if line.startswith('Query:'):
            CurrentQuery = tabline(line).split('\t')[1]
            L = tabline(line).split('\t')[2].split('[L=')[1].split(']')[0]
            continue
        if line.startswith('>>'):
            InDomains = True
            CurrentDomain = (tabline(line).split('\t')[1],tabline(line).split('\t')[2])
            continue
        if InDomains and (' hmmfrom ') in line:
            k = 1
            continue
        if InDomains and k >= 3:
            if line.strip() == '' or line.strip().startswith('Alignments for each domain:'):
                k = 0
                InDomains = False
                continue
        if InDomains and k > 0:
            k += 1
            if k >= 3:
                fields = tabline(line.replace('!','').replace('?','').replace('.]','').replace('..','').replace('[]','').replace('[.','')).split('\t')
                outline = CurrentQuery + '\t' + L + '\t' + CurrentDomain[0] + '\t' + CurrentDomain[1] + '\t'
                outline = outline + fields[1] + '\t'
                outline = outline + fields[2] + '\t'
                outline = outline + fields[3] + '\t'
                outline = outline + fields[4] + '\t'
                outline = outline + fields[5] + '\t'
                outline = outline + fields[6] + '\t'
                outline = outline + fields[7] + '\t'
                outline = outline + fields[8] + '\t'
                outline = outline + fields[9] + '\t'
                outline = outline + fields[10] + '\t'
                outline = outline + fields[11]
                outfile.write(outline + '\n')
                continue
        if line.strip() == '//':
            CurrentQuery = ''

    outfile.close()

run()