##################################
#                                #
# Last modified 05/14/2012       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s inputfilename species_name outfilename [-minSeqLen number]' % sys.argv[0]
        print ' use _ instead of intervals in the species names, the script will convert them back to interval'
        sys.exit(1)

    inputfilename = sys.argv[1]
    species = sys.argv[2].replace('_',' ')
    outputfilename = sys.argv[3]

    outfile = open(outputfilename, 'w')

    listoflines = open(inputfilename)
    Keep=False
    for line in listoflines:
        if line.startswith('>'):
            if Keep:
                outfile.write(ID + '\n')
                outfile.write(''.join(sequence) + '\n')
            Keep=False
            sequence=[]
            ID=line.strip()
            if species in ID:
                Keep=True
            continue
        else:
            if Keep:
                sequence.append(line.strip())
            else:
                continue
 
    outfile.close()

run()

