##################################
#                                #
# Last modified 2018/09/30       #
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import os
from sets import Set
import time
import gzip

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s guidescan_input guide_length [-5pG]' % sys.argv[0]
        print '\tthe script will take bases [1:1+GL]'
        print '\tthe script will print to stdout by default'
        print '\tuse the [-5pG] option to add G in the beginning'
        print '\tAssumed GuideScan format:'
        print '\t\tchromosome,target site start coordinate,target site end coordinate,gRNA,cutting efficiency score,cutting specificity score,strand,offtargets sum,offtargets summary,annotation,gRNA label'
        sys.exit(1)

    GS = sys.argv[1]
    GL = int(sys.argv[2])

    do5G = False
    if '-5pG' in sys.argv:
        do5G = True

    if GS.endswith('.gz'):
        linelist = gzip.open(GS)
    else:
        linelist = open(GS)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields = line.strip().split(',')
        if len(fields) < 3:
            continue
        sgRNA = fields[3]
        newsgRNA = sgRNA[1:1+GL]
        if do5G:
            newsgRNA = 'G' + newsgRNA
        print line.strip().replace(sgRNA,newsgRNA)

run()
