##################################
#                                #
# Last modified 04/02/2015       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s list_files outfile_prefix' % sys.argv[0]
        print '\tthe list of files should of A-vs-B files that are the output of ping-pong-sampling-two-samples-TS-CI.py'
        sys.exit(1)

    config = sys.argv[1]
    outfileprefix = sys.argv[2]

    FilesList = []
    lineslist = open(config)
    for line in lineslist:
        if line[0]=='#' or line.strip() == '':
            continue
        fields = line.strip().split('\t')
        A_vs_B = fields[0]
        FilesList.append(A_vs_B)

    RepeatDict = {}
    Samples = []
    for file in FilesList:
        linelist = open(file)
        for line in linelist:
            if line[0]=='#' or line.strip() == '':
                continue
            fields = line.strip().split('\t')
            repeat = fields[0]
            A = fields[1]
            B = fields[2]
            Samples.append(A)
            Samples.append(B)
            SSreads = int(fields[5])
            ppA = float(fields[6])
            ppB = float(fields[7])
            if RepeatDict.has_key(repeat):
                pass
            else:
                RepeatDict[repeat] = {}
            RepeatDict[repeat][(A,B)] = (SSreads,ppA,ppB)
            RepeatDict[repeat][(B,A)] = (SSreads,ppB,ppA)

    Samples = list(Set(Samples))
    Samples.sort()

    for n in range(len(Samples)-1):
        outfile = open(outfileprefix + str(n),'w')
        outline = '#repeat\treads'
        for S in Samples:
            outline = outline + '\t' + S
        outfile.write(outline + '\n')
        for repeat in RepeatDict.keys():
            Seen = {}
            CountsDict = {}
            for (A,B) in RepeatDict[repeat].keys():
                if Seen.has_key((B,A)):
                    Seen[(A,B)] = ''
                    continue
                Seen[(A,B)] = ''
                (SSreads,ppA,ppB) = RepeatDict[repeat][(A,B)]
                if CountsDict.has_key(A):
                    pass
                else:
                    CountsDict[A] = []
                if CountsDict.has_key(B):
                    pass
                else:
                    CountsDict[B] = []
                CountsDict[A].append(SSreads)
                CountsDict[B].append(SSreads)
            Counts = []
            for A in CountsDict.keys():
                Counts.append(max(CountsDict[A]))
            Counts.sort()
            SS = Counts[n]
            outline = repeat + '\t' + str(SS)
            for A in Samples:
                maxSS = 0
                for B in Samples:
                    if A == B:
                        continue
                    (SSreads,ppA,ppB) = RepeatDict[repeat][(A,B)]
                    if maxSS < SSreads:
                        maxSS = SSreads
                    if SSreads == SS:
                        outline = outline + '\t' + str(ppA)
                        break
                if maxSS < SS:
                    outline = outline + '\t' + 'nan'
            outfile.write(outline + '\n')
        outfile.close()
        
run()

