##################################
#                                #
# Last modified 2018/04/28       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import numpy as np
import os
from sets import Set

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s inputfilename outputfilename [-minCov N]' % sys.argv[0]
        sys.exit(1)
    
    metfilename = sys.argv[1]
    outfilename = sys.argv[2]

    MC = 1
    if '-minCov' in sys.argv:
        MC = int(sys.argv[sys.argv.index('-minCov')+1])
        print 'will discard positions with coverage less than', MC

    outfile=open(outfilename,'w')

    if metfilename.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + metfilename
    elif metfilename.endswith('.gz'):
        cmd = 'zcat ' + metfilename
    else:
        cmd = 'cat ' + metfilename
    p = os.popen(cmd, "r")
    line = 'line'
    j=0
    while line != '':
        line = p.readline()
        if line.startswith('#'):
            continue
        if line.startswith('chrom\tstart\tend\tmeth\tunmeth\tcov'):
            continue
        if line == '':
            break
        j+=1
        if j % 1000000 == 0:
            out = str(j/1000000) + 'M lines processed'
            print out
        if ' ' in line:
            fields=line.strip().split(' ')
        else:
            fields=line.strip().split('\t')
        chr = fields[0]
        start = int(float(fields[1]))
        stop = int(float(fields[2]))
        meth = int(float(fields[3]))
        unmeth = int(float(fields[4]))
        cov = float(fields[5])
        if cov < MC:
            continue
        score = meth/cov
        outline = fields[0] + '\t' + str(start) + '\t' + str(stop) + '\t' + str(score)
        outfile.write(outline + '\n')

    outfile.close()
   
run()
