##################################
#                                #
# Last modified 2019/04/03       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import os
import string

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s FIMO.txt motifs.meme' % sys.argv[0]
        print '\tNote: the fimo.txt file can be .gz or .bz2'
        print '\tNote: the script will print to stdout'
        sys.exit(1)

    FIMO = sys.argv[1]
    JASPAR = sys.argv[2]

    JASPARDict = {}

#    print 'parsing JASPAR file'

    linelist = open(JASPAR)
    InMotif = False
    for line in linelist:
        if line.startswith('MOTIF '):
            motifID = line.strip().split(' ')[1]
            try:
                TF = line.strip().split(' ')[2].replace(':','_')
            except:
                TF = line.strip().split(' ')[1].replace(':','_')
            JASPARDict[motifID] = TF
            continue
        else:
            continue

#    print 'finished parsing JASPAR file'

#    print 'parsing FIMO'

    if FIMO.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + FIMO
    elif FIMO.endswith('.gz'):
        cmd = 'gunzip -c ' + FIMO
    else:
        cmd = 'cat ' + FIMO
    p = os.popen(cmd, "r")

    line = 'line'

#    c = 0
    while line != '':
        line = p.readline()
        if line == '':
            break
#        c += 1
#        if c % 5000000 == 0:
#            print str(c/1000000) + 'M lines processed in FIMO file'
        if line.startswith('motif_id\t') or line == '\n' or line.startswith('# FIMO (Find') or line.startswith('# The format') or line.startswith('# fimo'):
            continue
        if line.startswith('#'):
            outline = line.strip() + '\tTF'
            print outline
            continue
        fields = line.strip().split('\t')
        motifID = fields[0]
        TF = JASPARDict[motifID]
        outline = line.strip() + '\t' + TF
        print outline

run()

