##################################
#                                #
# Last modified 2016/12/21       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import copy
from sets import Set

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s CoPAP-input PFam_order max_p_val outfilename' % sys.argv[0]
        print '\tNote: the script will output only a single COG match per species, and it will be the best hit; hits other than perfect matches will be printed as ? signs'
        print '\t\tuse the [-useALL] option if you want all hits as 1 characters'
        sys.exit(1)

    edges = sys.argv[1]
    PFamorder = sys.argv[2]
    maxPV = float(sys.argv[3])
    outfilename = sys.argv[4]

    PfamorderDict = {}

    linelist = open(PFamorder)
    for line in linelist:
        fields = line.strip().split('\t')
        domain = fields[1]
        PfamorderDict[int(fields[0]) + 1] = domain

    outfile = open(outfilename, 'w')

    linelist = open(edges)
    for line in linelist:
        if line.startswith('posA'):
            outfile.write('#' + line.strip() + '\tCOG1\tname\tdefinition\tCOG2\tname\tdefinition' + '\n')
            continue
        fields = line.strip().split('\t')
        dom1num = int(fields[0])
        dom2num = int(fields[1])
        dom1 = PfamorderDict[dom1num]
        dom2 = PfamorderDict[dom2num]
        pval = float(fields[6])
        if pval < maxPV:
            pass
        else:
            continue
        outline = line.strip() + '\t' + dom1 + '\t' + dom2
        outfile.write(outline + '\n')

    outfile.close()
   
run()
