##################################
#                                #
# Last modified 2019/10/12       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set
import os
import subprocess

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s list_of_inter.txt_files outfile' % sys.argv[0]
        print '\tlist_of_files format: label <tab> file_name'
        sys.exit(1)

    filelist = sys.argv[1]
    outfilename = sys.argv[2]

    StatsDict = {}

    linelist = open(filelist)
    for lline in linelist:
        ffields = lline.strip().split('\t')
        label = ffields[0]
        file = ffields[1]
        linelist = open(file)
        StatsDict[label] = {}
        StatsDict[label]['Sequenced Read Pairs'] = 'nan'
        StatsDict[label]['Normal Paired'] = 'nan'
        StatsDict[label]['Chimeric Paired'] = 'nan'
        StatsDict[label]['Chimeric Ambiguous'] = 'nan'
        StatsDict[label]['Unmapped'] = 'nan'
        StatsDict[label]['Ligation Motif Present'] = 'nan'
        StatsDict[label]['Alignable (Normal+Chimeric Paired)'] = 'nan'
        StatsDict[label]['Unique Reads'] = 'nan'
        StatsDict[label]['PCR Duplicates'] = 'nan'
        StatsDict[label]['Optical Duplicates'] = 'nan'
        StatsDict[label]['Library Complexity Estimate'] = 'nan'
        StatsDict[label]['Intra-fragment Reads'] = 'nan'
        StatsDict[label]['Below MAPQ Threshold'] = 'nan'
        StatsDict[label]['Hi-C Contacts'] = 'nan'
        StatsDict[label]['Hi-C Contacts Ligation Motif Present'] = 'nan'
        StatsDict[label]['Hi-C Contacts 3p Bias (Long Range)'] = 'nan'
        StatsDict[label]['Hi-C Contacts Pair Type (L-I-O-R)'] = 'nan'
        StatsDict[label]['Inter-chromosomal'] = 'nan'
        StatsDict[label]['Intra-chromosomal'] = 'nan'
        StatsDict[label]['Short Range (<20Kb)'] = 'nan'
        StatsDict[label]['Long Range (>20Kb)'] = 'nan'
        for line in linelist:
            linestrip = line.strip()
            if linestrip.startswith('Sequenced Read Pairs:  '):
                StatsDict[label]['Sequenced Read Pairs'] = linestrip.split('Sequenced Read Pairs:  ')[1]
            if linestrip.startswith('Normal Paired: '):
                StatsDict[label]['Normal Paired'] = linestrip.split('Normal Paired: ')[1]
            if linestrip.startswith('Chimeric Paired: '):
                StatsDict[label]['Chimeric Paired'] = linestrip.split('Chimeric Paired: ')[1]
            if linestrip.startswith('Chimeric Ambiguous: '):
                StatsDict[label]['Chimeric Ambiguous'] = linestrip.split('Chimeric Ambiguous: ')[1]
            if linestrip.startswith('Unmapped: '):
                StatsDict[label]['Unmapped'] = linestrip.split('Unmapped: ')[1]
            if linestrip.startswith('Ligation Motif Present: '):
                StatsDict[label]['Ligation Motif Present'] = linestrip.split('Ligation Motif Present: ')[1]
            if linestrip.startswith('Alignable (Normal+Chimeric Paired): '):
                StatsDict[label]['Alignable (Normal+Chimeric Paired)'] = linestrip.split('Alignable (Normal+Chimeric Paired): ')[1]
            if linestrip.startswith('Unique Reads: '):
                StatsDict[label]['Unique Reads'] = linestrip.split('Unique Reads: ')[1]
            if linestrip.startswith('PCR Duplicates: '):
                StatsDict[label]['PCR Duplicates'] = linestrip.split('PCR Duplicates: ')[1]
            if linestrip.startswith('Optical Duplicates: '):
                StatsDict[label]['Optical Duplicates'] = linestrip.split('Optical Duplicates: ')[1]
            if linestrip.startswith('Library Complexity Estimate: '):
                StatsDict[label]['Library Complexity Estimate'] = linestrip.split('Library Complexity Estimate: ')[1]
            if linestrip.startswith('Intra-fragment Reads: '):
                StatsDict[label]['Intra-fragment Reads'] = linestrip.split('Intra-fragment Reads: ')[1]
            if linestrip.startswith('Hi-C Contacts: '):
                StatsDict[label]['Hi-C Contacts'] = linestrip.split('Hi-C Contacts: ')[1]
            if linestrip.startswith('Ligation Motif Present: '):
                StatsDict[label]['Hi-C Contacts Ligation Motif Present'] = linestrip.split('Ligation Motif Present: ')[1]
            if 'Bias (Long Range)' in linestrip:
                StatsDict[label]['Hi-C Contacts 3p Bias (Long Range)'] = linestrip.split('Bias (Long Range): ')[1]
            if '(L-I-O-R)' in linestrip:
                StatsDict[label]['Hi-C Contacts Pair Type (L-I-O-R)'] = linestrip.split('(L-I-O-R): ')[1]
            if linestrip.startswith('Inter-chromosomal: '):
                StatsDict[label]['Inter-chromosomal'] = linestrip.split('Inter-chromosomal: ')[1]
            if linestrip.startswith('Intra-chromosomal: '):
                StatsDict[label]['Intra-chromosomal'] = linestrip.split('Intra-chromosomal: ')[1]
            if linestrip.startswith('Short Range (<20Kb): '):
                StatsDict[label]['Short Range (<20Kb)'] = linestrip.split('Short Range (<20Kb): ')[1]
            if linestrip.startswith('Long Range (>20Kb): '):
                StatsDict[label]['Long Range (>20Kb)'] = linestrip.split('Long Range (>20Kb): ')[1]

    outfile = open(outfilename,'w')

    labels = StatsDict.keys()
    labels.sort()

    outline = '#'    
    for label in labels:
        outline = outline + '\t' + label
    outfile.write(outline + '\n')

    outline = 'Sequenced Read Pairs'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Sequenced Read Pairs']
    outfile.write(outline + '\n')

    outline = 'Normal Paired'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Normal Paired']
    outfile.write(outline + '\n')

    outline = 'Chimeric Paired'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Chimeric Paired']
    outfile.write(outline + '\n')

    outline = 'Chimeric Ambiguous'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Chimeric Ambiguous']
    outfile.write(outline + '\n')

    outline = 'Unmapped'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Unmapped']
    outfile.write(outline + '\n')

    outline = 'Ligation Motif Present'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Ligation Motif Present']
    outfile.write(outline + '\n')

    outline = 'Alignable (Normal+Chimeric Paired)'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Alignable (Normal+Chimeric Paired)']
    outfile.write(outline + '\n')

    outline = 'Unique Reads'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Unique Reads']
    outfile.write(outline + '\n')

    outline = 'PCR Duplicates'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['PCR Duplicates']
    outfile.write(outline + '\n')

    outline = 'Optical Duplicates'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Optical Duplicates']
    outfile.write(outline + '\n')

    outline = 'Library Complexity Estimate'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Library Complexity Estimate']
    outfile.write(outline + '\n')

    outline = 'Intra-fragment Reads'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Intra-fragment Reads']
    outfile.write(outline + '\n')

    outline = 'Hi-C Contacts'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Hi-C Contacts']
    outfile.write(outline + '\n')

    outline = 'Hi-C Contacts Ligation Motif Present'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Hi-C Contacts Ligation Motif Present']
    outfile.write(outline + '\n')
    
    outline = 'Hi-C Contacts 3p Bias (Long Range)'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Hi-C Contacts 3p Bias (Long Range)']
    outfile.write(outline + '\n')

    outline = 'Hi-C Contacts Pair Type (L-I-O-R)'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Hi-C Contacts Pair Type (L-I-O-R)']
    outfile.write(outline + '\n')

    outline = 'Inter-chromosomal'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Inter-chromosomal']
    outfile.write(outline + '\n')

    outline = 'Intra-chromosomal'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Intra-chromosomal']
    outfile.write(outline + '\n')

    outline = 'Short Range (<20Kb)'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Short Range (<20Kb)']
    outfile.write(outline + '\n')

    outline = 'Long Range (>20Kb)'
    for label in labels:
        outline = outline + '\t' + StatsDict[label]['Long Range (>20Kb)']
    outfile.write(outline + '\n')

    outfile.close()
        
run()