#
#  makerdsfrombed.py
#  ENRAGE
#
#  Created by Ali Mortazavi on 6/21/08.
#
try:
    import psyco
    psyco.full()
except:
    pass

import sys
import string
import optparse
from commoncode import writeLog, getConfigParser, getConfigIntOption, getConfigBoolOption
import ReadDataset

verstring = "makerdsfrombed: version 2.2"
print verstring


def main(argv=None):
    if not argv:
        argv = sys.argv

    usage = "usage: python %prog label bedfile outrdsfile [--append] [--index] [propertyName::propertyValue] [--cache numPages]"

    parser = makeParser(usage)
    (options, args) = parser.parse_args(argv[1:])

    if len(args) < 3:
        print usage
        print "\ntreats all imported reads as uniquely mapped\n"
        sys.exit(1)

    label = args[0]
    filename = args[1]
    outdbname = args[2]

    if options.rnaDataType:
        dataType = "RNA"
    else:
        dataType = "DNA"

    propertyList = []
    for arg in args:
        if "::" in arg:
            (pname, pvalue) = arg.strip().split("::")
            propertyList.append((pname, pvalue))

    makerdsfrombed(label, filename, outdbname, options.init, dataType, options.doIndex, options.cachePages, propertyList)


def makeParser(usage=""):
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("--append", action="store_false", dest="init")
    parser.add_option("--index", action="store_true", dest="doIndex")
    parser.add_option("--cache", type="int", dest="cachePages")
    parser.add_option("--RNA", action="store_true", dest="rnaDataType")

    configParser = getConfigParser()
    section = "makerdsfrombed"
    init = getConfigBoolOption(configParser, section, "init", True)
    rnaDataType = getConfigBoolOption(configParser, section, "RNA", False)
    doIndex = getConfigBoolOption(configParser, section, "doIndex", False)
    cachePages = getConfigIntOption(configParser, section, "cachePages", 100000)

    parser.set_defaults(init=init, rnaDataType=rnaDataType, doIndex=doIndex, cachePages=cachePages)

    return parser


def makerdsfrombed(label, filename, outdbname, init=True, dataType="DNA", doIndex=False, cachePages=100000, propertyList=[]):
    readsize = 0
    padsize = 0
    index = 0
    insertSize = 100000

    writeLog(outdbname + ".log", verstring, string.join(sys.argv[1:]))

    infile = open(filename,"r")

    rds = ReadDataset.ReadDataset(outdbname, init, dataType, verbose=True)
    if not init:
        rds.dropIndex()

    #check that our cacheSize is better than the dataset's default cache size
    defaultCacheSize = rds.getDefaultCacheSize()
    if cachePages > defaultCacheSize:
        if init:
            rds.setDBcache(cachePages, default=True)
        else:
            rds.setDBcache(cachePages)

    if len(propertyList) > 0:
        rds.insertMetadata(propertyList)

    insertList = []
    for line in infile:
        if "track" in line:
            continue

        fields = line.split()
        if readsize == 0:
            readsize = abs(int(fields[1]) - int(fields[2]))
            if init:
                rds.insertMetadata([("readsize", readsize+1)])
                rds.insertMetadata([("imported_from_bed", "True")])

        chrom = fields[0]
        start = int(fields[1])
        stop = int(fields[2])
        sense = fields[5]
        readID = "%s-%s" % (label, str(index))
        insertList.append((readID, chrom, start, stop, sense, 1.0, "", ""))
        if index % insertSize == 0:
            rds.insertUniqs(insertList)
            insertList = []
            print ".",
            sys.stdout.flush()

        index += 1

    if len(insertList) > 0:
        rds.insertUniqs(insertList)

    countString = "%d unique reads" % index
    print countString

    writeLog(outdbname + ".log", verstring, countString)

    if doIndex:
        print "building index...."
        if cachePages > defaultCacheSize:
            rds.setDBcache(cachePages)
            rds.buildIndex(cachePages)
        else:
            rds.buildIndex(defaultCacheSize)


if __name__ == "__main__":
    main(sys.argv)