##################################
#                                #
# Last modified 04/29/2015       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os
from sets import Set

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s PFAM.tblout.tab doman1:maxE(,doman2:maxE,....,domann:maxE) output' % sys.argv[0]
        sys.exit(1)

    tblout = sys.argv[1]
    WantedDict = {}
    wanted = sys.argv[2].split(',')
    for entry in wanted:
        domain = entry.split(':')[0]
        cutoff = float(entry.split(':')[1])
        WantedDict[domain] = cutoff
    tblout = sys.argv[1]
    outfilename = sys.argv[3]

    ProteinDict = {}

    inputdatafile = open(tblout)
    for line in inputdatafile:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        domain = fields[0]
        ID = fields[2]
        E = float(fields[4])
        if ProteinDict.has_key(ID):
            pass
        else:
            ProteinDict[ID] = {}
        ProteinDict[ID][domain] = E

    ToKeep = {}

    for ID in ProteinDict:
        P = 0
        for domain in WantedDict:
            if ProteinDict[ID].has_key(domain):
                if ProteinDict[ID][domain] <= WantedDict[domain]:
                    P+=1
        if P == len(WantedDict.keys()):
            ToKeep[ID] = 1

    outfile = open(outfilename,'w')

    inputdatafile = open(tblout)
    for line in inputdatafile:
        if line.startswith('#'):
            outfile.write(line)
            continue
        fields = line.strip().split('\t')
        ID = fields[2]
        if ToKeep.has_key(ID):
            outfile.write(line)

    outfile.close()

run()

