##################################
#                                #
# Last modified 10/15/2012       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s input outfilename' % sys.argv[0]
        sys.exit(1)

    datafilename = sys.argv[1]
    outfilename = sys.argv[2]

    outfile = open(outfilename, 'w')

    lineslist  = open(datafilename)
    i=0
    InQuery = False
    InAlignments = False
    for line in lineslist:
        i+=1
        if i % 1000000 == 0:
            print i, 'lines processed'
        if line.startswith('Query='):
            InQuery = True
            CurrentQuery = line.strip().replace('Query= ','')
            continue
        if InQuery:
            pass
        else:
            continue
        if line.startswith('Sequences producing significant alignments:'):
            InAlignments = True
            IA = 0
            continue
        if InAlignments:
            IA += 1
            if IA == 2:
                TopHit = line.strip().split(' ')[0]
                TopHitEvalue = line.strip().split(' ')[-1]
                TopHitScore = line.strip().split(' ')[-4]
                outline = CurrentQuery + '\t' + TopHit + '\t' + TopHitScore + '\t' + TopHitEvalue
                outfile.write(outline + '\n')
                InAlignments = False
                InQuery = False
                CurrentQuery = ''
                
    outfile.close()
        
run()

