##################################
#                                #
# Last modified 11/23/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set

def run():

    print sys.argv

    if len(sys.argv) < 3:
        print 'usage: python %s inputfilename IDR-threshold outputfilename' % sys.argv[0]
        print '       Input file format:'
        print '       <label> <tab> <IDR-formatted filename> <tab> <npeaks.txt filename>'
        sys.exit(1)
    
    input = sys.argv[1]
    IDRthreshold = float(sys.argv[2])
    outfilename = sys.argv[3]

    linelist = open(input)
    outfile = open(outfilename, 'w')
    DataDict={}
    for line in linelist:
        fieldss=line.strip().split('\t')
        label=fieldss[0]
        print label
        DataDict[label]={}
        DataDict[label]['IDR-formatted']=fieldss[1]
        DataDict[label]['npeaks']=fieldss[2]
        rep1=0
        rep2=0
        passingIDR=0
        overlapping=0
        listoflines=open(DataDict[label]['npeaks'])
        lastIDR=0.0
        for line1 in listoflines:
            if line1.startswith('"IDR.cutoff"'):
                continue
            fields=line1.strip().split(' ')
            IDR=float(fields[0])
            if IDRthreshold >= lastIDR and IDRthreshold <= IDR:
                minRep1=float(fields[3])
                minRep2=float(fields[4])
                print fields
                break
            else:
                lastIDR=IDR
        listoflines=open(DataDict[label]['IDR-formatted'])
        for line1 in listoflines:
            fields=line1.strip().split('\t')
            rep1Value=float(fields[1])
            rep2Value=float(fields[3])
            if rep1Value > 0:
                rep1+=1
            if rep2Value > 0:
                rep2+=1
            if rep1Value > 0 and rep2Value > 0:
                overlapping+=1
            if rep1Value >= minRep1 and rep2Value >= minRep2:
                passingIDR+=1
        DataDict[label]['rep1']=rep1
        DataDict[label]['rep2']=rep2
        DataDict[label]['overlapping']=overlapping
        DataDict[label]['passingIDR']=passingIDR
 
    keys=DataDict.keys()
    keys.sort()

    outline='#Label\tRep1_elements\tRep2_elements\tOverlapping\tPassingIDR'
    outfile.write(outline+'\n')

    for label in keys:
        outline=label+'\t'+str(DataDict[label]['rep1'])+'\t'+str(DataDict[label]['rep2'])+'\t'+str(DataDict[label]['overlapping'])+'\t'+str(DataDict[label]['passingIDR'])
        outfile.write(outline+'\n')

    outfile.close()
            
run()

