##################################
#                                #
# Last modified 2022/11/30       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import os
import numpy as np
from sets import Set

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s config outputfilename' % sys.argv[0]
        print '\tassumed config format:'
        print '\tlabel\treadLenDist\tchr-hist\t'
        sys.exit(1)

    config = sys.argv[1]
    outfile = sys.argv[2]

    outfile = open(sys.argv[2],'w')

    outline = '#dataset\ttotal_reads\tmean_read_length\talignments\tunaligned_reads\tfraction_aligned'
    outfile.write(outline + '\n')

    cmd = 'cat ' + config
    p = os.popen(cmd, "r")
    line = 'line'
    while line != '':
        line = p.readline()
        if line.startswith('#'):
            continue
        if line == '':
            break
        fields = line.strip().split('\t')
        label = fields[0]
        print label
        readDist = fields[1]
        chrHist = fields[2]
        TotalReads = 0
        RLs = []
        linelist = open(readDist)
        for L in linelist:
            fields = L.strip().split('\t')
            RL = int(fields[0])
            RLcounts = int(fields[1])
            TotalReads += RLcounts
            RLs += RLcounts*[RL]
        linelist = open(chrHist)
        Aligned = 0
        Unaligned = 0
        for L in linelist:
            fields = L.strip().split('\t')
            chr = fields[0]
            Rcounts = int(fields[1])
            if chr == '*':
                Unaligned += Rcounts
            else:
                Aligned += Rcounts
        outline = label + '\t' + str(TotalReads) + '\t' + str(np.mean(RLs)) + '\t' + str(Aligned) + '\t' + str(Unaligned) + '\t' + str((TotalReads - Unaligned)/(TotalReads + 0.0))
        outfile.write(outline + '\n')

    outfile.close()

        
run()

