#
#  combineRPKMS.py
#  ENRAGE
#

print "combineRPKMs: version 1.1"
try:
    import psyco
    psyco.full()
except:
    pass

import sys
import optparse
import string
from commoncode import getConfigParser, getConfigBoolOption


def main(argv=None):
    if not argv:
        argv = sys.argv

    usage = "usage: python %prog firstRPKM expandedRPKM finalRPKM combinedOutfile [--withmultifraction]"
    parser = makeParser(usage)
    (options, args) = parser.parse_args(argv[1:])

    if len(args) < 3:
        print usage
        sys.exit(1)

    firstfile = args[0]
    expandedfile = args[1]
    finalfile = args[2]
    outfile = args[3]

    combineRPKMs(firstfile, expandedfile, finalfile, outfile, options.doFraction)


def makeParser(usage=""):
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("--withmultifraction", action="store_true", dest="doFraction")

    configParser = getConfigParser()
    section = "combineRPKMs"
    doFraction = getConfigBoolOption(configParser, section, "doFraction", False)

    parser.set_defaults(doFraction=doFraction)

    return parser


def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, doFraction=False):

    firstDict = getRPKMDict(firstfileName)
    gidDict, expandedDict = getRPKMDict(expandedfileName, getGIDDict=True)

    if doFraction:
        header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\tfractionMulti\n"
    else:
        header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\n"

    outfile = open(outfileName, "w")
    outfile.write(header)

    finalfile = open(finalfileName)
    for line in finalfile:
        fields = line.strip().split()
        gene = fields[0]
        rnakb = fields[1]
        finalRPKM = fields[2]
        firstRPKM = firstDict.get(gene, "")
        outputFields = [gidDict[gene], rnakb, gene, firstRPKM, expandedDict[gene], finalRPKM]

        if doFraction:
            fraction = fields[3]
            outputFields.append(fraction)

        outline = "%s\n" % string.join(outputFields, "\t")
        outfile.write(outline)

    finalfile.close()
    outfile.close()


def getRPKMDict(rpkmFileName, getGIDDict=False):
    gidDict = {}
    rpkmDict = {}
    rpkmFile = open(rpkmFileName)
    for line in rpkmFile:
        fields = line.strip().split()
        rpkmDict[fields[1]] = fields[-1]
        if getGIDDict:
            gidDict[fields[1]] = fields[0]

    rpkmFile.close()

    if getGIDDict:
        return gidDict, rpkmDict
    else:
        return rpkmDict


if __name__ == "__main__":
    main(sys.argv)
