##################################
#                                #
# Last modified 04/01/2016       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os
from sets import Set

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s list_of_HOG_fasta_files outfilename [-HOG_renamed]' % sys.argv[0]
        sys.exit(1)

    FFlist = sys.argv[1]
    outfilename = sys.argv[2]

    SpeciesDict = {}

    HOGDict={}
    linelist = open(FFlist)
    for fileline in linelist:
        F = fileline.split('/')[-1].split('.fa')[0]
        if HOGDict.has_key(F):
            print 'duplicate file names detected, exiting'
            print F
            sys.exit(1)
        HOGDict[F] = {}
        fasta = fileline.strip().split('\t')[0]
        inputdatafile = open(fasta)
        for line in inputdatafile:
            if line[0]=='>':
                species = line.strip().split(']')[0].split('[')[1]
                if HOGDict[F].has_key(species):
                    pass
                else:
                    HOGDict[F][species] = 0
                HOGDict[F][species]+=1
                SpeciesDict[species] = 1

    SpeciesList = SpeciesDict.keys()
    SpeciesList.sort()

    outfile = open(outfilename, 'w')

    outline = '#HOG'
    for species in SpeciesList:
        outline = outline + '\t' + species
    outfile.write(outline + '\n')

    HOGs = HOGDict.keys()
    HOGs.sort()

    for F in HOGs:
        outline = F
        for species in SpeciesList:
            if HOGDict[F].has_key(species):
                outline = outline + '\t' + str(HOGDict[F][species])
            else:
                outline = outline + '\t' + '0'
        outfile.write(outline + '\n')

    outfile.close()

run()

