##################################
#                                #
# Last modified 07/25/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s <list of files> <species to keep> <output folder> <outfile suffix>' % sys.argv[0]
        print '      the species to keep parameter should be comma-separated'
        print '      one file per line for the list of ifles'
        sys.exit(1)
    
    input = sys.argv[1]
    speciesList = sys.argv[2].split(',')
    speciesDict={}
    for species in speciesList:
        speciesDict[species]=''
    outFolder = sys.argv[3]
    outSuffix = sys.argv[4]

    print speciesDict

    files=open(input)
    i=0
    for line1 in files:
        i+=1
        if i % 1000 == 0:
            print i, 'files processed'
        MAF=line1.strip().split('\t')[0]
        linelist=open(MAF)
        outfile=open(outFolder + '/' + MAF.split('/')[-1] + '.' + outSuffix,'w')
        keepLine=False
        for line in linelist:
            if line.startswith('>'):
                species=line.strip().split('>')[1].split('|')[0]
                if speciesDict.has_key(species):
                    outfile.write(line)
                    keepLine=True
                else:
                    keepLine=False
            else:
                if keepLine:
                    outfile.write(line)
                keepLine=False  
        outfile.close()
   
run()
