##################################
#                                #
# Last modified 04/12/2016       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os
from sets import Set

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s ORF-fasta-config OrthologousGroups.txt outfilename [-maxSize M]' % sys.argv[0]
        print 'Note: ORF-fasta-config format: species_ID <tab> ORF.faa'
        sys.exit(1)

    ORFfaConfig = sys.argv[1]
    OGtxt = sys.argv[2]
    outfilename = sys.argv[3]

    ORFfaDict = {}
    ORFfaDictToSpecies = {}
    SpeciesOGDict = {}

    linelist = open(ORFfaConfig)
    for line in linelist:
        fields = line.strip().split('\t')
        species = fields[0]
        fasta = fields[1]
        lines = open(fasta)
        SpeciesOGDict[species] = {}
        for l in lines:
            if l.startswith('>'):
                ID = l.strip().split('>')[1]
                ORFfaDictToSpecies[ID] = species
            else:
                continue

    OGlist = []

    linelist = open(OGtxt)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        OG = fields[0]
        OGlist.append(OG)
        for f in fields[1:len(fields)]:
            ID = f.split(':')[1]
            species = ORFfaDictToSpecies[ID]
            SpeciesOGDict[species][OG] = ID
            ORFfaDict[ID] = OG

    outfile = open(outfilename, 'w')

    SpeciesList = SpeciesOGDict.keys()
    SpeciesList.sort()

    outline = '#OG'
    for species in SpeciesList:
        outline = outline + '\t' + species
    outfile.write(outline + '\n')

    for OG in OGlist:
        outline = OG
        for species in SpeciesList:
            if SpeciesOGDict[species].has_key(OG):
                outline = outline + '\t' + OG
            else:
                outline = outline + '\t' + '-'
        outfile.write(outline + '\n')

    i = 0

    for ID in ORFfaDictToSpecies.keys():
        if ORFfaDict.has_key(ID):
            continue
        else:
            i+=1
            outline = 'NoOrth' + str(i)
            for species in SpeciesList:
                if species == ORFfaDictToSpecies[ID]:
                    outline = outline + '\t' + 'NoOrth' + str(i)
                else:
                    outline = outline + '\t' + '-'
        outfile.write(outline + '\n')

    outfile.close()

run()
