##################################
#                                #
# Last modified 2019/10/12       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s file geneNameID TSS-0bp.bed radius outfilename' % sys.argv[0]
        sys.exit(1)

    input = sys.argv[1]
    geneFieldID = int(sys.argv[2])
    TSSfile = sys.argv[3]
    radius = int(sys.argv[4])
    outfilename = sys.argv[5]

    TSSDict = {}

    linelist = open(TSSfile)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        chr = fields[0]
        TSS = int(fields[1])
        genes = fields[4].split(',')
        for gene in genes:
            if TSSDict.has_key(gene):
                pass
            else:
                TSSDict[gene] = []
            TSSDict[gene].append((chr,TSS))

    outfile = open(outfilename, 'w')

    linelist = open(input)
    for line in linelist:
        if line.startswith('#'):
            outfile.write(line)
            continue
        fields = line.strip().split('\t')
        gene = fields[geneFieldID]
        if TSSDict.has_key(gene):
            pass
        else:
            print gene, 'not found in TSS anottation, skipping'
            continue
        for (chr,TSS) in TSSDict[gene]:
            outline = line.strip() + '\t' + chr + '\t' + str(TSS - radius) + '\t' + str(TSS + radius)
            outfile.write(outline + '\n')

    outfile.close()

run()
