##################################
#                                #
# Last modified 2017/02/01       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import copy
from sets import Set

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s GOGfasta COG_order COG_def outfilename [-arCOG]' % sys.argv[0]
        sys.exit(1)

    fasta = sys.argv[1]
    COGorder = sys.argv[2]
    COGdef = sys.argv[3]
    outfilename = sys.argv[4]

    doArcCOG = False
    if '-arCOG' in sys.argv:
        doArcCOG = True

    COGdict = {}

    linelist = open(COGdef)
    for line in linelist:
        fields = line.strip().split('\t')
        if doArcCOG:
            COG = fields[0]
            name = fields[2]
            definition = fields[3]
        else:
            COG = fields[0]
            name = ''
            definition = fields[2]
        COGdict[COG] = (name,definition)

    COGorderDict = {}

    outfile = open(outfilename, 'w')
    outline = '#species'

    linelist = open(COGorder)
    for line in linelist:
        fields = line.strip().split('\t')
        COG = fields[1]
        COGorderDict[int(fields[0]) + 1] = COG
        if COGdict.has_key(COG):
            outline = outline + '\t' + COG + ':' + COGdict[COG][1]
        else:
            outline = outline + '\t' + COG + ':'
            print COG, 'not found in defintiion file'

    outfile.write(outline + '\n')

    linelist = open(fasta)
    for line in linelist:
        if line.startswith('>'):
            species = line.strip().split('>')[1]
            outline = species
        else:
            for i in line.strip():
                outline = outline + '\t' + i
            outfile.write(outline + '\n')

    outfile.close()
   
run()
