##################################
#                                #
# Last modified 2018/10/19       #
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import random
import gzip
from sets import Set
import time

def run():

    if len(sys.argv) < 6:
        print 'usage: python %s guidescan.csv chr left right minCFD outprefix' % sys.argv[0]
        print '\tassumed GuideScan format:'
        print '\t\tchr8,128343419,128343441,GTGGTTGAAGTAGTTGCGAG,63,0.74993132,+,2,2:0|3:2,*,gRNA_1__1'
        sys.exit(1)

    GS = sys.argv[1]
    CHR = sys.argv[2]
    L = int(sys.argv[3])
    R = int(sys.argv[4])
    minCFD = float(sys.argv[5])
    outprefix = sys.argv[6]

    GuideList = []

    if GS.endswith('.gz'):
        linelist = gzip.open(GS)
    else:
        linelist = open(GS)
    currentTSS = ''
    for line in linelist:
        if line.startswith('chromosome,target site'):
            continue
        if ',' not in line:
            continue
        fields = line.strip().split(',')
        chr = fields[0]
        print chr, CHR, fields
        if chr != CHR:
            continue
        left = int(fields[1])
        right = int(fields[2])
        print left, right, chr, L, R
        if left < L and right < L:
            continue
        if left > R and right > R:
            continue
        gRNA = fields[3]
        cutting_efficiency_score = float(fields[4])
        cutting_specificity_score = float(fields[5])
        if cutting_specificity_score < minCFD:
            continue
        strand =  fields[6]
        offtargets_sum = fields[7]
        offtargets_summary = fields[8]
        guide = (chr,left,right,gRNA,cutting_efficiency_score,cutting_specificity_score,strand,offtargets_sum,offtargets_summary)
        GuideList.append(guide)

    GuideList.sort()

    outfileM = open(outprefix + '.matrix', 'w')
    outfileP = open(outprefix + '.pgRNAs', 'w')

    Matrix = {}
    outline = '#'
    for i in range(L,R):
        outline = outline + '\t' + str(i)
        Matrix[i] = {}
        for j in range(L,R):
            Matrix[i][j] = -1

    outfileM.write(outline + '\n')

    outline = '#ID\tsgRNA1_chr\tsgRNA1_start\tsgRNA1_end\tsgRNA2_chr\tsgRNA2_start\tsgRNA2_end\tsgRNA_1\tsgRNA_2'
    outline = outline + '\tcutting_efficiency_score_sgRNA_1\tcutting_efficiency_score_sgRNA_2\tcutting_specificity_score_sgRNA_1\tcutting_specificity_score_sgRNA_2\tstrand_sgRNA_1\tstrand_sgRNA_2'
    outline = outline + '\tofftargets_sum_sgRNA_1\tofftargets_sum_sgRNA_2\tofftargets_summary_sgRNA_1\tofftargets_summary_sgRNA_2'
    outfileP.write(outline + '\n')

    pgRNA = 0
    for G1 in range(len(GuideList)):
        (chr1,left1,right1,gRNA1,cutting_efficiency_score1,cutting_specificity_score1,strand1,offtargets_sum1,offtargets_summary1) = GuideList[G1]
        if strand1 == '+':
            CS1 = right1 - 3 - 3
        if strand1 == '-':
            CS1 = left1 + 3 + 3
        for G2 in range(G1,len(GuideList)):
            (chr2,left2,right2,gRNA2,cutting_efficiency_score2,cutting_specificity_score2,strand2,offtargets_sum2,offtargets_summary2) = GuideList[G2]
            if strand1 == '+':
                CS2 = right2 - 3 - 3
            if strand1 == '-':
                CS2 = left2 + 3 + 3
            if Matrix.has_key(CS1):
                pass
            else:
                continue
            if Matrix[CS1].has_key(CS2):
                pass
            else:
                continue
            if Matrix.has_key(CS2):
                pass
            else:
                continue
            if Matrix[CS2].has_key(CS1):
                pass
            else:
                continue
            Matrix[CS1][CS2] = cutting_specificity_score1 + cutting_specificity_score2
            Matrix[CS2][CS1] = cutting_specificity_score1 + cutting_specificity_score2
            pgRNA += 1
            outline = 'pgRNA_' + str(pgRNA)
            outline = outline + '\t' + chr1 + '\t' + str(left1) + '\t' + str(right1)
            outline = outline + '\t' + chr2 + '\t' + str(left2) + '\t' + str(right2)
            outline = outline + '\t' + gRNA1 + '\t' + gRNA2
            outline = outline + '\t' + str(cutting_efficiency_score1) + '\t' + str(cutting_efficiency_score2)
            outline = outline + '\t' + str(cutting_specificity_score1) + '\t' + str(cutting_specificity_score2)
            outline = outline + '\t' + strand1 + '\t' + strand2
            outline = outline + '\t' + offtargets_sum1 + '\t' + offtargets_sum2
            outline = outline + '\t' + offtargets_summary1 + '\t' + offtargets_summary2
            outfileP.write(outline + '\n')

    for i in range(L,R):
        outline = str(i)
        for j in range(L,R):
            outline = outline + '\t' + str(Matrix[i][j])
        outfileM.write(outline + '\n')

    outfileP.close()
    outfileM.close()

run()
