#
#  transcripts.py
#  ENRAGE
#
#  Created by Ali Mortazavi on 1/25/08.
#
""" usage: python %s rpkmFile outFile [--transcriptome size] [--cells count] [--efficiency fraction]
           where transcriptome size is in Gbp, cell count is in arbitrary units and efficiency is a fraction
"""

import sys
import optparse
from commoncode import getConfigParser, getConfigFloatOption

def main(argv=None):
    if not argv:
        argv = sys.argv

    print "transcripts: version 3.1"
    usage = "usage: python %prog rpkmFile outFile [options]"

    parser = makeParser(usage)
    (options, args) = parser.parse_args(argv[1:])

    if len(args) < 2:
        print usage
        sys.exit(1)

    infile = args[0]
    outfile = args[1]
    
    transcripts(infile, outfile, options.tSize, options.cellCount, options.efficiency)


def makeParser(usage=""):
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("--transcriptome", type="float", dest="tSize",
                      help="transcriptome size in Gbp [default 200000.0]")
    parser.add_option("--cells", type="float", dest="cellCount",
                      help="arbitrary units [default 1e6]")
    parser.add_option("--efficiency", type="float", dest="efficiency",
                      help="fraction [default 0.3]")

    configParser = getConfigParser()
    section = "transcripts"
    tSize = getConfigFloatOption(configParser, section, "tSize", 200000.0)
    cellCount = getConfigFloatOption(configParser, section, "cellCount", 1e6)
    efficiency = getConfigFloatOption(configParser, section, "efficiency", 0.3)

    parser.set_defaults(tSize=tSize, cellCount=cellCount, efficiency=efficiency)

    return parser


def transcripts(infilename, outfilename, tSize=200000, cellCount=1e6, efficiency=0.3):
    infile = open(infilename)
    outfile = open(outfilename, "w")
    for line in infile:
        fields = line.strip().split()
        rpkm = float(fields[-1])
        transcripts = rpkm * tSize
        transPerCell = transcripts / cellCount / efficiency
        outfile.write("%s\t%.1f\t%.1f\n" % (fields[0], transcripts, transPerCell))

    infile.close()
    outfile.close()

if __name__ == "__main__":
    main(sys.argv)