##################################
#                                #
# Last modified 2016/12/21       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import copy
from sets import Set

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s significantCorrelations.isNormForBr.0.pairsAboveBH.txt COG_order COGdef.tab outfilename [-arCOG]' % sys.argv[0]
        print '\tNote: the script will output only a single COG match per species, and it will be the best hit; hits other than perfect matches will be printed as ? signs'
        print '\t\tuse the [-useALL] option if you want all hits as 1 characters'
        sys.exit(1)

    edges = sys.argv[1]
    COGorder = sys.argv[2]
    COGdef = sys.argv[3]
    outfilename = sys.argv[4]

    doArcCOG = False
    if '-arCOG' in sys.argv:
        doArcCOG = True

    COGdict = {}

    linelist = open(COGdef)
    for line in linelist:
        fields = line.strip().split('\t')
        if doArcCOG:
            COG = fields[0]
            name = fields[2]
            definition = fields[3]
        else:
            COG = fields[0]
            name = ''
            definition = fields[2]
        COGdict[COG] = (name,definition)

    COGorderDict = {}

    linelist = open(COGorder)
    for line in linelist:
        fields = line.strip().split('\t')
        COG = fields[1]
        COGorderDict[int(fields[0]) + 1] = COG

    outfile = open(outfilename, 'w')

    linelist = open(edges)
    for line in linelist:
        if line.startswith('posA'):
            outfile.write('#' + line.strip() + '\tCOG1\tname\tdefinition\tCOG2\tname\tdefinition' + '\n')
            continue
        COG1num = int(line.strip().split('\t')[0])
        COG2num = int(line.strip().split('\t')[1])
        COG1 = COGorderDict[COG1num]
        COG2 = COGorderDict[COG2num]
        outline = line.strip() + '\t' + COG1 + '\t' + COGdict[COG1][0] + '\t' + COGdict[COG1][1] + '\t' + COG2 + '\t' + COGdict[COG2][0] + '\t' + COGdict[COG2][1]
        outfile.write(outline + '\n')

    outfile.close()
   
run()
