##################################
#                                #
# Last modified 2019/12/01       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import random

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s config outputfilename [-splitspace]' % sys.argv[0]
        print '\tconfig format: <wanted sequences> <tab> fieldID  <tab> <prefix> <tab> <fasta>'
        sys.exit(1)

    config = sys.argv[1]
    outfilename = sys.argv[2]

    doSS = False
    if '-splitspace' in sys.argv:
        doSS = True

    outfile = open(outfilename, 'w')

    linelist = open(config)
    for LINE in linelist:
        fields = LINE.strip().split('\t')
        print fields	
        wanted = fields[0]
        fieldID = int(fields[1])
        prefix = fields[2]
        fasta = fields[3]
        WantedDict = {}
        print fasta
        lines = open(wanted)
        for line in lines:
            if line.startswith('#'):
                continue
            ff = line.strip().split('\t')
            ID = ff[fieldID]
            WantedDict[ID] = 1
        inputdatafile = open(fasta)
        Keep = False
        for line in inputdatafile:
            if line[0]=='>':
                ID = line.strip().split('>')[1]
                if doSS:
                    ID = ID.split(' ')[0]
                if WantedDict.has_key(ID):
                    Keep = True
                else:
                    Keep = False
            else:
                pass
            if Keep:
                if line[0]=='>':
                    if doSS:
                        outline = '>' + prefix + line[1:].split(' ')[0] + '\n'
                    else:
                        outline = '>' + prefix + line[1:]
                    outfile.write(outline)
                else:
                    outfile.write(line)   

    outfile.close()
   
run()
