##################################
#                                #
# Last modified 08/11/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s inputfilename minFPKM_conf_lo outfilename' % sys.argv[0]
        sys.exit(1)

    datafilename = sys.argv[1]
    minFPKM = float(sys.argv[2])
    outfilename = sys.argv[3]

    outfile = open(outfilename, 'w')

    outfile.write('#FPKM_conf_lo threshold='+str(minFPKM)+'\n')
    outfile.write('#type\tmatch\tabove_threshold\tbelow_treshold\n')

    DataDict={}
    lineslist  = open(datafilename)
    for line in lineslist:
        if line[0]=='#':
            continue
        fields=line.strip().split('\t')
        type=fields[4]
        match=fields[5]
        FPKM_conf_lo=float(fields[7])
        if DataDict.has_key((type,match)):
            pass
        else:
            DataDict[(type,match)]={}
            DataDict[(type,match)]['above']=0
            DataDict[(type,match)]['below']=0
        if FPKM_conf_lo >= minFPKM:
            DataDict[(type,match)]['above']+=1
        else:
            DataDict[(type,match)]['below']+=1

    keys=DataDict.keys()
    keys.sort()
    for (type,match) in keys:
        outline = type+'\t'+match+'\t'+str(DataDict[(type,match)]['above'])+'\t'+str(DataDict[(type,match)]['below'])
        outfile.write(outline + '\n')
    outfile.close()
        
run()

