##################################
#                                #
# Last modified 12/14/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s FPKM1 FPKM2 IDfield FPKM_field FPKM_conf_lo_field cutoff outfilename ' % sys.argv[0]
        sys.exit(1)

    FPKM1 = sys.argv[1]
    FPKM2 = sys.argv[2]
    NameID = int(sys.argv[3])
    FPKM_ID = int(sys.argv[4])
    FPKM_lo_ID = int(sys.argv[5])
    cutoff = float(sys.argv[6])
    outputfilename = sys.argv[7]

    outfile = open(outputfilename, 'w')

    ExpressionDict={}
    lineslist = open(FPKM1)
    for line in lineslist:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        ID=fields[NameID]     
        geneName_fields=ID.split('-')
        if len(geneName_fields)==2:
            geneName=geneName_fields[0]
        else:
            geneName=''
            for p in range(len(geneName_fields)-1):
                geneName=geneName+'-'+geneName_fields[p]
            geneName=geneName[1:len(geneName)]
        FPKM=float(fields[FPKM_ID])
        FPKM_lo=float(fields[FPKM_lo_ID])
        if ExpressionDict.has_key(geneName):
            pass
        else:
            ExpressionDict[geneName]={}
        if ExpressionDict[geneName].has_key(ID):
            pass
        else:
            ExpressionDict[geneName][ID]={}
        ExpressionDict[geneName][ID]['rep1']=(FPKM,FPKM_lo)

    lineslist = open(FPKM2)
    for line in lineslist:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        ID=fields[NameID]     
        geneName_fields=ID.split('-')
        if len(geneName_fields)==2:
            geneName=geneName_fields[0]
        else:
            geneName=''
            for p in range(len(geneName_fields)-1):
                geneName=geneName+'-'+geneName_fields[p]
            geneName=geneName[1:len(geneName)]
        FPKM=float(fields[FPKM_ID])
        FPKM_lo=float(fields[FPKM_lo_ID])
        ExpressionDict[geneName][ID]['rep2']=(FPKM,FPKM_lo)

    outfile.write('ID\trep1_FPKM\trep1_FPKM_lo\trep2_FPKM\trep2_FPKM_lo\tclass\n')
    for geneName in ExpressionDict.keys():
        if len(ExpressionDict[geneName].keys())==1:
            continue
        Rep1PassRep2Zero={}
        for ID in ExpressionDict[geneName].keys():
            if (ExpressionDict[geneName][ID]['rep1'][1] >= cutoff and ExpressionDict[geneName][ID]['rep2'][1] == 0) or (ExpressionDict[geneName][ID]['rep2'][1] >= cutoff and ExpressionDict[geneName][ID]['rep1'][1] == 0):
                Rep1PassRep2Zero[ID]=(ExpressionDict[geneName][ID]['rep1'][0],ExpressionDict[geneName][ID]['rep1'][1],ExpressionDict[geneName][ID]['rep2'][0],ExpressionDict[geneName][ID]['rep2'][1])
        if len(Rep1PassRep2Zero.keys())==1:
            ID=Rep1PassRep2Zero.keys()[0]
            outline=ID+'\t'+str(ExpressionDict[geneName][ID]['rep1'][0])+'\t'+str(ExpressionDict[geneName][ID]['rep1'][1])+'\t'+str(ExpressionDict[geneName][ID]['rep2'][0])+'\t'+str(ExpressionDict[geneName][ID]['rep2'][1])+'\tsingle-replicate'
            outfile.write(outline+'\n')
            continue
        else:
            switched=False
            for ID1 in Rep1PassRep2Zero.keys():
                for ID2 in Rep1PassRep2Zero.keys():
                    if Rep1PassRep2Zero[ID1][1] >= cutoff and Rep1PassRep2Zero[ID1][3] == 0 and Rep1PassRep2Zero[ID2][3] >= cutoff and Rep1PassRep2Zero[ID2][1] == 0:
                        switched=True
        if switched:
            for ID in Rep1PassRep2Zero.keys():
                outline=ID+'\t'+str(ExpressionDict[geneName][ID]['rep1'][0])+'\t'+str(ExpressionDict[geneName][ID]['rep1'][1])+'\t'+str(ExpressionDict[geneName][ID]['rep2'][0])+'\t'+str(ExpressionDict[geneName][ID]['rep2'][1])+'\tswitch'
                outfile.write(outline+'\n')

    outfile.close()

run()

