##################################
#                                #
# Last modified 2023/11/08       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s files_config domain_colors_config' % sys.argv[0]
        print '\t# domain_colors_config format:'
        print '\t\tdomain\tcolor'
        print '\t# files_config format:'
        print '\t\tlabel\tprotein_fasta\tPfam32.dom.tab'
        sys.exit(1)

    files_config = sys.argv[1]
    colors_config = sys.argv[2]

    ColorDict = {}

    listoflines = open(colors_config)
    for line in listoflines:
        fields = line.strip().split('\t')
        domain = fields[0]
        ColorDict[domain] = fields[1]

    print 'finished inputting colors'
    print ColorDict

    DomainDict = {}
    
    listoflines = open(files_config)
    for line in listoflines:
        fields = line.strip().split('\t')
        label = fields[0]
        fasta = fields[1]
        PFAM = fields[2]
        if DomainDict.has_key(label):
            pass
        else:
            DomainDict[label] = {}

        lines = open(fasta)
        ID=''
        for LL in lines:
            if LL[0]=='>':
                if ID == '':
                    pass
                else:
                    sequence = ''.join(sequence)
                    DomainDict[label][ID] = {}
                    DomainDict[label][ID]['sequence'] = sequence
                    DomainDict[label][ID]['domains'] = []
                ID = LL.strip().split('>')[1]
                sequence=[]
            else:
                sequence.append(LL.strip())
        sequence = ''.join(sequence)
        DomainDict[label][ID] = {}
        DomainDict[label][ID]['sequence'] = sequence
        DomainDict[label][ID]['domains'] = []

        lines = open(PFAM)
        for LL in lines:
            if LL.startswith('#'):
                continue
            fields = LL.strip().split('\t')
            DD = fields[0]
            if ColorDict.has_key(DD):
                pass
            else:
                continue
            ID = fields[3]
            left = fields[17]
            right = fields[18]
            DomainDict[label][ID]['domains'].append((DD,left,right))

    print 'finished inputting files'

    for label in DomainDict.keys():
        print label
        outfile = open(label + '.config', 'w')
        for ID in DomainDict[label].keys():
            outline = ID + '\t' + ID + '\t0\t' + str(len(DomainDict[label][ID]['sequence'])) + '\t' + ColorDict['protein']
            outfile.write(outline + '\n')
            for (domain,left,right) in DomainDict[label][ID]['domains']:
                outline = ID + '\t' + domain + '\t' + left + '\t' + right + '\t' + ColorDict[domain]
                outfile.write(outline + '\n')
        outfile.close()

run()

