##################################
#                                #
# Last modified 2017/07/24       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set
import os

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s CXreport context' % sys.argv[0]
        print "\tcontext format: any combination of CHG, CG and CHH, comma separated"
        print "\tthe script accepts .gz and .bz2 files"
        sys.exit(1)

    input = sys.argv[1]
    Ctypes = sys.argv[2].split(',')

    p=0
    i=0
    if input.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + input
    elif input.endswith('.gz'):
        cmd = 'gunzip -c ' + input
    else:
        cmd = 'cat ' + input
    p1 = os.popen(cmd, "r")
    line = '.'
    while line != '':
        line = p1.readline()
        if line.startswith('#') or line.startswith('track ') or line.strip() == '':
            continue
        fields = line.strip().split('\t')
        type = fields[5]
        if type in Ctypes:
            pass
        else:
            continue
        chr = fields[0]
        start = int(fields[1])
        metCounts = int(fields[3])
        unmetCounts = int(fields[4])
        if metCounts + unmetCounts > 0:
            outline = chr + '\t' + str(start) + '\t' + str(start + 1) + '\t' + str(100*metCounts/(metCounts + unmetCounts + 0.0))
#            outline = chr + '\t' + str(start) + '\t' + str(start + 1) + '\t' + str(metCounts/(metCounts + unmetCounts + 0.0)) + '\t' + str(metCounts) + '\t' + str(unmetCounts)
        else:
#            outline = chr + '\t' + str(start) + '\t' + str(start + 1) + '\t' + '0' + '\t' + str(metCounts) + '\t' + str(unmetCounts)
#            outline = chr + '\t' + str(start) + '\t' + str(start + 1) + '\t' + '0'
            continue
        print outline

run()
