##################################
#                                #
# Last modified 2024/02/12       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import gzip
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s input bin_size outfile ' % sys.argv[0]
        print '\tassumed file format:'
        print '\t\t.	chrM	1	chrM	149	-	+	1	0'
        sys.exit(1)

    input = sys.argv[1]
    BS = int(sys.argv[2])
    outfilename = sys.argv[3]

    InteractionMatrix = {}

    TotalReads = 0.0
    MaxScore = 0

    if input.endswith('.gz'):
        linelist = gzip.open(input)
    else:
        linelist = open(input)
    for line in linelist:
        fields = line.strip().split('\t')
        chr1 = fields[1]
        chr2 = fields[3]
        pos1 = int(fields[2])
        pos2 = int(fields[4])
        pos1 = pos1 - (pos1 % BS)
        pos2 = pos2 - (pos2 % BS)
        TotalReads += 1
        if TotalReads % 1000000 == 0:
            print str(TotalReads/1000000) + 'M alignments reads processed'
        if InteractionMatrix.has_key((chr1,pos1)):
            pass
        else:
            InteractionMatrix[(chr1,pos1)] = {}
        if InteractionMatrix[(chr1,pos1)].has_key((chr2,pos2)):
            pass
        else:
            InteractionMatrix[(chr1,pos1)][(chr2,pos2)] = 0
        InteractionMatrix[(chr1,pos1)][(chr2,pos2)] += 1
        if InteractionMatrix[(chr1,pos1)][(chr2,pos2)] > MaxScore:
            MaxScore = InteractionMatrix[(chr1,pos1)][(chr2,pos2)]

    NormFactor = TotalReads/1000000
    MaxScore = MaxScore/NormFactor
    MSNorm = MaxScore/100

    outfile = open(outfilename,'w')

    print 'NormFactor:', NormFactor
    print 'total aligned read pairs:', TotalReads
#    outline = '#total aligned read pairs:\t' + str(TotalReads)
#    outfile.write(outline + '\n')

    chr1List = InteractionMatrix.keys()
    chr1List.sort()

    for (chr1,pos1) in chr1List:
        chr2List = InteractionMatrix[(chr1,pos1)].keys()
        chr2List.sort()
        for (chr2,pos2) in chr2List:
            score = InteractionMatrix[(chr1,pos1)][(chr2,pos2)]
            score = score/NormFactor
            score = score/MSNorm
            outline = chr1 + '\t' + str(pos1) + '\t' + str(pos1 + BS) + '\t' + chr2 + '\t' + str(pos2) + '\t' + str(pos2 + BS) + '\tthickness=' + str(score)
            outfile.write(outline + '\n')

    outfile.close()
        
run()