##################################
#                                #
# Last modified 07/20/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s input p-value outfile [-Bonferroni]' % sys.argv[0]
        print '     input format: label <tab> file' 
        sys.exit(1)

    input=sys.argv[1]
    minpvalue=float(sys.argv[2])
    outfilename = sys.argv[3]

    doBonferroni=False
    if '-Bonferroni' in sys.argv:
        doBonferroni=True
        print 'will apply Bonferroni multiple hypothesis testing correction'
        lineslist=open(input)
        numlines=0
        for line in lineslist:
            numlines+=1
        minpvalue=minpvalue/numlines

    outfile = open(outfilename, 'w')

    lineslist=open(input)
    for line in lineslist:
        fields=line.strip().split('\t')
        label = fields[0]
        file = fields[1]
        linelist=open(file)
        for line in linelist:
            fields=line.strip().split('\t')
            if line.startswith('#gene'):
                parent1=fields[5].split('_collapsed_reads')[0]
                parent2=fields[6].split('_collapsed_reads')[0]
                parent1ASE=0
                parent2ASE=0
                parent1ASEN0=0
                parent2ASEN0=0
                continue
            pvalue = float(fields[9])
            if pvalue > minpvalue:
                continue
            else:
                parent1Counts = int(fields[5])
                parent2Counts = int(fields[6])
                if parent1Counts > parent2Counts:
                    parent1ASE += 1
                if parent1Counts < parent2Counts:
                    parent2ASE += 1
                if parent2Counts > 0 and parent1Counts == 0:
                    parent2ASEN0 += 1
                if parent1Counts > 0 and parent2Counts == 0:
                    parent1ASEN0 += 1
        outline = '#Sample\t' + parent1 + '_ASE_events\t' + parent2 + '_ASE_events\t'  + parent1 + '_complete_ASE_events\t'  + parent2 + '_complete_ASE_events\t'
        outfile.write(outline +'\n')
        outline = label + '\t' + str(parent1ASE) + '\t' + str(parent2ASE) + '\t' + str(parent1ASEN0) + '\t' + str(parent2ASEN0)
        outfile.write(outline +'\n')

    outfile.close()

run()