##################################
#                                #
# Last modified 2025/05/25       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os
import math
from sets import Set

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s SingleMoleculeCorrelation-NMI-matrix-C-BAM.bed outfilename' % sys.argv[0]
        print '\Note: it is assumed that only a single chromosome is in the input file'
        sys.exit(1)

    input = sys.argv[1]
    outfilename = sys.argv[2]

    Matrix = {}

    if input.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + input
    elif input.endswith('.gz') or input.endswith('.bgz'):
        cmd = 'zcat ' + input
    elif input.endswith('.zip'):
        cmd = 'unzip -p ' + input
    else:
        cmd = 'cat ' + input
    P = os.popen(cmd, "r")
    line = 'line'
    while line != '':
        line = P.readline().strip()
        if line == '':
            break
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        i = int(fields[1])
        scores = fields[3].split(';')
        Matrix[i] = {}
        for S in scores:
            j = int(S.split(':')[0]) + i
            NMI = S.split(':')[1]
            Matrix[i][j] = NMI

    positions = Matrix.keys()
    minP = min(positions) - 5
    maxP = max(positions) + 5

    outfile = open(outfilename, 'w')

    outline = '#'
    for i in range(minP,maxP):
        outline + '\t' + str(i)
    outfile.write(outline +'\n')

    for i in range(minP,maxP):
        outline = str(i)
        for j in range(minP,maxP):
            if Matrix.has_key(i) and Matrix[i].has_key(j):
                outline += '\t' + Matrix[i][j]
            elif Matrix.has_key(j) and Matrix[j].has_key(i):
                outline += '\t' + Matrix[j][i]
            else:
                outline += '\tnan'
        outfile.write(outline +'\n')

    outfile.close()
            
run()

