##################################
#                                #
# Last modified 12/15/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 7:
        print 'usage: python %s FPKM1 FPKM2 IDfieldID FPKMfieldID FPKM_conf_lo_filedID thresholds outfilename' % sys.argv[0]
        print 'threshold should be comma-separated' 
        sys.exit(1)

    FPKM1 = sys.argv[1]
    FPKM2 = sys.argv[2]
    IDfield = int(sys.argv[3])
    FPKMfield = int(sys.argv[4])
    FPKM_conf_lo_field = int(sys.argv[5])
    thresholds = sys.argv[6]
    ThresholdList1=thresholds.split(',')
    ThresholdList=[]
    for threshold in ThresholdList1:
        ThresholdList.append(float(threshold))
    ThresholdList.sort()
    outputfilename = sys.argv[7]

    outfile = open(outputfilename, 'w')

    FPKM1Dict={}
    lineslist = open(FPKM1)
    for line in lineslist:
        fields = line.strip().split('\t')
        ID=fields[IDfield]     
        try:
            FPKM=float(fields[FPKMfield])
            FPKM_conf_lo=float(fields[FPKM_conf_lo_field])
        except:
            continue
        FPKM1Dict[ID]=(FPKM,FPKM_conf_lo)

    FPKM2Dict={}
    lineslist = open(FPKM2)
    for line in lineslist:
        fields = line.strip().split('\t')
        ID=fields[IDfield]     
        try:
            FPKM=float(fields[FPKMfield])
            FPKM_conf_lo=float(fields[FPKM_conf_lo_field])
        except:
            continue
        FPKM2Dict[ID]=(FPKM,FPKM_conf_lo)

    FPKM1_keys=FPKM1Dict.keys()
    FPKM1_keys.sort()
    FPKM2_keys=FPKM2Dict.keys()
    FPKM2_keys.sort()
    if FPKM1_keys != FPKM2_keys:
        print 'elements sets not the same!!!!!!!!!!'

    OutputDict={}
    OutputDict['FPKM']={}
    OutputDict['FPKM_lo']={}

    for threshold in ThresholdList:
        OutputDict['FPKM_lo'][threshold]=0
        OutputDict['FPKM'][threshold]=0
        for ID in FPKM1_keys:
            if (threshold > 0 and FPKM1Dict[ID][0] >= threshold and FPKM2Dict[ID][0] >= threshold) or (threshold == 0 and FPKM1Dict[ID][0] > threshold or FPKM2Dict[ID][0] > threshold):
                OutputDict['FPKM'][threshold]+=1
            if (threshold > 0 and FPKM1Dict[ID][1] >= threshold and FPKM2Dict[ID][1] >= threshold) or (threshold == 0 and FPKM1Dict[ID][1] > threshold or FPKM2Dict[ID][1] > threshold):
                OutputDict['FPKM_lo'][threshold]+=1

    outline='FPKM_threshold\tFPKM\tFPKM_lo'
    outfile.write(outline+'\n')
    for threshold in ThresholdList:
        outline=str(threshold)+'\t'+str(OutputDict['FPKM'][threshold])+'\t'+str(OutputDict['FPKM_lo'][threshold])
        print outline
        outfile.write(outline+'\n')
        
    outfile.close()

run()

