##################################
#                                #
# Last modified 2025/01/12       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s main_config submotif_config outfile' % sys.argv[0]
        print '\tTMain config format:' 
        print '\t\tPNG (comma-separated) <tab> TF name <tab> cell line <tab> ENCODE ID '
        print '\tTSubmotif config format:' 
        print '\t\tPNG <tab> TF name <tab> cell line <tab> ENCODE ID '
        print '\tNote: there can be more than one PNG per TF/cell/ENCODE combination'
        sys.exit(1)

    TFlist = sys.argv[1]
    Submotif = sys.argv[2]
    outfilename = sys.argv[3]

    outfile = open(outfilename,'w')

    print TFlist, Submotif, outfilename

    SubmotifDict = {}

    linelist = open(Submotif)
    for line in linelist:
        if line.strip() == '':
            continue
        fields = line.strip().split('\t')
        files = fields[0].split(',')
        TF = fields[1]
        celltype = fields[2]
        accession = fields[3]
        label = (TF,celltype,accession)
        if SubmotifDict.has_key(label):
            pass
        else:
            SubmotifDict[label] = []
        for PNG in files:
            SubmotifDict[label].append(PNG)

    linelist = open(TFlist)
    for line in linelist:
        if line.strip() == '':
            continue
        fields = line.strip().split('\t')
        files = fields[0].split(',')
        TF = fields[1]
        celltype = fields[2]
        accession = fields[3]

        if len(files) == 1:
            file = files[0]
        else:
            file = files[-1]
            for i in range(len(files)-1):
                outfile.write('\clearpage\n')
                outline = '\includegraphics[width=18cm]{' + files[i] + '}'
                outfile.write(outline + '\n')

        outfile.write('\clearpage\n')
        outfile.write('\n')
        outline = '\begin{figure*}[!ht]'
        outfile.write(outline + '\n')
        outline = '\begin{center}'
        outfile.write(outline + '\n')
        outline = '\includegraphics[width=18cm]{' + file + '}'
        outfile.write(outline + '\n')
        outline = '\end{center}'
        outfile.write(outline + '\n')
        outline = '\captionsetup{singlelinecheck=off,justification=justified}'
        outfile.write(outline + '\n')
        outline = '\caption{'
        outfile.write(outline + '\n')
        outline = '{\bf Properties and genomic distribution of motifs identified for the ' + TF + ' transcription factor in ' + celltype + ' (accession ' + accession + ')}. ' 
        outfile.write(outline + '\n')
#        outline = '(A) Annotated GENCODE transcript/protein isoforms and their domain composition;'
#        outfile.write(outline + '\n')
#        outline = '(B) Forward- and reverse-strand read distribution over all ChIP peaks (ranked by peak strength);'
#        outfile.write(outline + '\n')
#        outline = '(C) Distribution of ChIP peaks over chromHMM states;'
#        outfile.write(outline + '\n')
#        outline = '(D) Distribution of ChIP peaks over repetitive elements;'
#        outfile.write(outline + '\n')
#        outline = '(E) GREAT enrichment of ChIP peaks;'
#        outfile.write(outline + '\n')
#        outline = '(F) CIS-BP motif, B1H-RC sequence preference prediction, and de novo discovered TF-MoDISco motifs (aligned to the B1H-RC prediction);'
#        outfile.write(outline + '\n')
#        outline = '(G) Aggregate DNase-seq cut sites (footprint) profiles for each de novo discovered motif (shown are both footprint profiles over all motifs in the genome and over only motifs found in called peaks;'
#        outfile.write(outline + '\n')
#        outline = '(H) Aggregate PhastCons conservation scores (100-way and 30-way mammalian) over for each de novo discovered motif;'
#        outfile.write(outline + '\n')
#        outline = '(I) Aggregate PhyloP conservation scores (100-way and 30-way mammalian) over for each de novo discovered motif;'
#        outfile.write(outline + '\n')
        outline = '}'
        outfile.write(outline + '\n')
        outline = '\label{' + file[:-4] + '}'
        outfile.write(outline + '\n')
        outline = '\end{figure*}'
        outfile.write(outline + '\n')
        outfile.write('\n')
        outfile.write('\n')

        label = (TF,celltype,accession)

#        print label

        if SubmotifDict.has_key(label):
            pass
        else:
            continue
        SubmotifDict[label].sort()

#        print SubmotifDict[label]

        outfile.write('\clearpage\n')

        for PNG in SubmotifDict[label]:
            pattern = PNG.split('pattern_')[-1].split('.')[0]
            patternN = PNG.split('.n_')[-1].split('.')[0]
            patterntype = PNG.split('.')[-2]

            outfile.write('\begin{center}' + '\n')
            outfile.write('{\bf TF-MoDisCO pattern\_' + pattern + ' n=' + patternN + ', ' + patterntype + '}' + '\n')
            outfile.write('\end{center}' + '\n')

            outfile.write('\begin{center}' + '\n')
            outfile.write('\includegraphics[width=18.5cm]{' + PNG + '}' + '\n')
            outfile.write('\end{center}' + '\n')

        outline = '\begin{figure*}[!ht]'
        outfile.write(outline + '\n')
        outline = '\begin{center}'
        outfile.write(outline + '\n')
        outline = '\end{center}'
        outfile.write(outline + '\n')
        outline = '\captionsetup{singlelinecheck=off,justification=justified}'
        outfile.write(outline + '\n')
        outline = '\caption{'
        outfile.write(outline + '\n')
        outline = '{\bf TF-MoDisCO subpatterns for the' + TF + ' transcription factor in ' + celltype + ' (accession ' + accession + ')}. ' 
        outfile.write(outline + '\n')
        outline = '}'
        outfile.write(outline + '\n')
        outline = '\label{' + file[:-4] + '-subpatterns}'
        outfile.write(outline + '\n')
        outline = '\end{figure*}'
        outfile.write(outline + '\n')
        outfile.write('\n')
        outfile.write('\n')

        outfile.write('\clearpage\n')

    outfile.close()


run()

