##################################
#                                #
# Last modified 2018/08/08       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import numpy as np
import random
import os
import math
from sets import Set

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s methylation_reads_all.tsv' % sys.argv[0]
        print '\Note: the script will print to stodut by default'
        print '\Note: the script assumes Tombo 1.3 probabilities'
        sys.exit(1)

    reads = sys.argv[1]

    if reads.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + reads
    elif reads.endswith('.gz') or reads.endswith('.bgz'):
        cmd = 'zcat ' + reads
    elif reads.endswith('.zip'):
        cmd = 'unzip -p ' + reads
    else:
        cmd = 'cat ' + reads
    RN = 0
    P = os.popen(cmd, "r")
    line = 'line'
    while line != '':
        line = P.readline().strip()
        if line == '':
            break
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        left = int(fields[1])
        right = int(fields[2])
        L = right - left
        if len(fields) < 7:
            continue
        loglike = fields[7].split(',')
        if len(loglike) < 2:
            continue
        LL = list(float(y) for y in loglike)
        LL = np.array(LL)
        M = np.sum(LL < 0.5)
        outline = str(L) + '\t' + str(M/(len(LL) + 0.0))
        print outline        
            
run()

