##################################
#                                #
# Last modified 2017/12/15       #
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import random
from sets import Set
import time

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s guidescan_input TSS_bed outfile' % sys.argv[0]
        sys.exit(1)

    GS = sys.argv[1]
    TSS = sys.argv[2]
    outfilename = sys.argv[3]

    TSSDict = {}

    linelist = open(TSS)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        chr = fields[0]
        left = fields[1]
        right = fields[2]
        geneName = fields[4]
        geneID = fields[5]
        transcriptID = fields[6]
        TSS = chr + ':' + left + '-' + right
        TSSDict[TSS] = (geneName,geneID,transcriptID)

    outfile = open(outfilename, 'w')

    outline = 'chromosome\ttarget site start coordinate\ttarget site end coordinate\tgRNA\tcutting efficiency score\tcutting specificity score\tstrand\tofftargets sum\tofftargets summary\tannotation\tgRNA label\tgeneID\tgeneName\ttranscriptID'
    outfile.write(outline + '\n')

    linelist = open(GS)
    currentTSS = ''
    SG = 0
    for line in linelist:
        if line.startswith('chromosome'):
            continue
        fields = line.strip().split('\t')
        if len(fields) == 1 and ':' in line and '-' in line:
            currentTSS = fields[0]
            continue
        SG += 1
        guideID = fields[10]
        ID = 'sgRNA_' + str(SG)
        (geneName,geneID,transcriptID) = TSSDict[currentTSS]
        outline = line.strip().replace(guideID,ID) + '\t' + geneName + '\t' + geneID + '\t' + transcriptID
        outfile.write(outline + '\n')

    outfile.close()

run()
