##################################
#                                #
# Last modified 2025/04/18       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import gzip
import numpy as np
import os
from sets import Set

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s methylation_single_base.tsv.bgz(,more files) chrom.sizes' % sys.argv[0]
        print '\tNote: the script will print to stdout'
        sys.exit(1)
    
    metfilenames = sys.argv[1].split(',')
    chromSizes = sys.argv[2]

    print '#chr\tleft\tright\tpercentage_methylated\tmeth\tunmeth'

    lineslist = open(chromSizes)
    for line in lineslist:
        fields = line.strip().split('\t')
        chr = fields[0]
        size = int(fields[1])
        ScoreDict = {}
        for metfilename in metfilenames:
            cmd = 'tabix' + ' ' + metfilename + ' ' + chr + ':0-' + str(size)
            p = os.popen(cmd, "r")
            lline = 'line'
            while lline != '':
                lline = p.readline().strip()
                if lline == '':
                    break
                ffields = lline.strip().split('\t')
                sstart = int(float(ffields[1]))
                sstop = int(float(ffields[2]))
                meth = float(ffields[4])
                unmeth = float(ffields[5])
                if ScoreDict.has_key((sstart,sstop)):
                    ScoreDict[(sstart,sstop)]['m'] += meth
                    ScoreDict[(sstart,sstop)]['u'] += unmeth
                else:
                    ScoreDict[(sstart,sstop)]= {}
                    ScoreDict[(sstart,sstop)]['m'] = meth
                    ScoreDict[(sstart,sstop)]['u'] = unmeth
        positions = ScoreDict.keys()
        positions.sort()
        for (a,b) in positions:
            meth = ScoreDict[(a,b)]['m']
            unmeth = ScoreDict[(a,b)]['u']
            outline = chr + '\t' + str(a) + '\t' + str(b) + '\t' + str(int(100*meth/(meth+unmeth))) + '\t' + str(int(meth))  + '\t' + str(int(unmeth))
            print outline
   
run()









