###########################################################################
#                                                                         #
# C O P Y R I G H T   N O T I C E                                         #
#  Copyright (c) 2003-10 by:                                              #
#    * California Institute of Technology                                 #
#                                                                         #
#    All Rights Reserved.                                                 #
#                                                                         #
# Permission is hereby granted, free of charge, to any person             #
# obtaining a copy of this software and associated documentation files    #
# (the "Software"), to deal in the Software without restriction,          #
# including without limitation the rights to use, copy, modify, merge,    #
# publish, distribute, sublicense, and/or sell copies of the Software,    #
# and to permit persons to whom the Software is furnished to do so,       #
# subject to the following conditions:                                    #
#                                                                         #
# The above copyright notice and this permission notice shall be          #
# included in all copies or substantial portions of the Software.         #
#                                                                         #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
# SOFTWARE.                                                               #
###########################################################################
#
# python parent class for experiments
try:
    from pysqlite2 import dbapi2 as sqlite
except:
    from sqlite3 import dbapi2 as sqlite

import os, time, tempfile, string

from cistematic.core import retrieveSeq, retrieveSeqFeatures, fasta
import cistematic.core 
from cistematic.core.motif import matrixRow, Motif
from cistematic.genomes import Genome
from cistematic import version
from os import environ

if environ.get("CISTEMATIC_TEMP"):
    cisTemp = environ.get("CISTEMATIC_TEMP")
else:
    cisTemp = "/tmp"

tempfile.tempdir = cisTemp


class Experiment:
    """ genepool = {(genomeA, geneA1): seq1, (genomeA, geneA2): seq2, (genomeB, geneB1): seq3,...}
        geneFeatures = ((genomeA, geneA1):[feature0, feature1, .....], (genomeB, geneB1):[feature0, feature1,...],....}
        programs = [(program_object1, setting1), (program_object2, setting2), ....]
    """
    genepool = {}
    geneFeatures = {}
    programs = []
    genepoolID = 0
    workdir = ""
    experimentID = ""
    experimentType = "generic"
    expFile = ""
    geneDB = ""
    debugSQL = False


    def __init__(self, expID, expDBFile, geneDBFile=""):
        self.experimentID = expID
        self.expFile = expDBFile
        self.geneDB = geneDBFile
        self.maskLowerCase = False
        self.boundToNextGene = False
        print "cistematic version %s" % version
        self.startDB()


    def __del__(self):
        self.savePrograms()
        self.saveGeneDB()
        self.uncacheGeneDB()
        self.removeWorkdir()


    def setExperimentID(self, expID):
        self.mlog("changing experiment ID from %s to %s" % (self.experimentID, expID))
        self.experimentID = expID


    def setGeneDB(self, geneDBFile=""):
        self.mlog("using %s as the gene database" % geneDBFile)
        self.geneDB = geneDBFile


    def cacheGeneDB(self, genome):
        try:
            cistematic.core.cacheGeneDB(genome)
            self.mlog("cached genome %s" % genome)
        except:
            self.mlog("could not cache genome %s" % genome)


    def uncacheGeneDB(self):
        try:
            cistematic.core.uncacheGeneDB()
        except:
            self.mlog("could not uncache genomes")


    def setMaskLowerCase(self, maskValue):
        if maskValue == True or maskValue == 1 or maskValue == "1":
            self.maskLowerCase = True
            self.setSettings("maskLowerCase", ["1"])
        else:
            self.maskLowerCase = False
            self.setSettings("maskLowerCase", ["0"])


    def setBoundToNextGene(self, boundValue):
        if boundValue == True or boundValue == 1 or boundValue == "1":
            self.boundToNextGene = True
            self.setSettings("boundToNextGene", ["1"])
        else:
            self.boundToNextGene = False
            self.setSettings("boundToNextGene", ["0"])


    def setSeqParameters(self, up=0, cds=1, down=0):
        self.upstream = up
        self.cds = cds
        self.downstream = down
        if cds == 0:
            cdsStatus = "NO "
        else:
            cdsStatus = ""

        self.setSettings("seq_parameters",  ["%d\t%d\t%d" % (up, cds, down)])
        self.mlog("setting sequence retrieval parameters to %d bp upstream, %d bp downstream, and %scds" % (up, down, cdsStatus))


    def getSeqParameters(self):
        return (self.upstream, self.cds, self.downstream)


    def dsetLength(self):
        stmt="SELECT count(*) from dataset where expID = '%s' " % self.experimentID
        res = self.sqlexp(stmt)
        try:
            answer = int(res[0][0])
        except IndexError:
            answer = 0

        return answer


    def resultsLength(self):
        stmt="SELECT count(*) from results where expID = '%s' " % self.experimentID
        res = self.sqlexp(stmt)
        try:
            answer = int(res[0][0])
        except IndexError:
            answer = 0

        return answer


    def checkMotID(self, motID):
        answer = False
        stmt = "SELECT ID from motifs where expID = '%s' and mTagID = '%s' " % (self.experimentID, motID)
        res = self.sqlexp(stmt)
        if len(res) > 0:
            answer = True

        return answer
  

    def findMotif(self,mTagID):
        if self.checkMotID(mTagID):
            return self.makeMotif(mTagID)
    
        self.mlog("could not find %s" % mTagID)

        return ""


    def makeMotif(self, motID):
        mPWM = []
        mseqs = []
        stmt = "SELECT motifSeq, threshold, info from motifs where expID = '%s' and mTagID = '%s' " % (self.experimentID, motID)
        res = self.sqlexp(stmt)
        entry = res[0]
        (seq, threshold, info) = entry
        stmt = "SELECT aFreq, cFreq, gFreq, tFreq from motifPWMs where expID = '%s' and mTagID = '%s' order by position" % (self.experimentID, motID)
        res = self.sqlexp(stmt)
        for entry in res:
            col = [0.0, 0.0, 0.0, 0.0]
            (aFreq, cFreq, gFreq, tFreq) = entry
            col[matrixRow["A"]] = aFreq
            col[matrixRow["C"]] = cFreq
            col[matrixRow["G"]] = gFreq
            col[matrixRow["T"]] = tFreq
            mPWM.append(col)

        stmt = "SELECT sequence from motifSequences where expID = '%s' and mTagID = '%s'  and type = 'instance' " % (self.experimentID, motID)
        res = self.sqlexp(stmt)
        for entry in res:
            seq = entry[0]
            mseqs.append(seq)

        return Motif(motID, seq, mPWM, mseqs, threshold, info)


    def saveMotif(self, mot):
        motID = mot.tagID
        motifSeq = mot.motifSeq
        motifInfo = mot.info
        motifThreshold = mot.threshold
        if self.checkMotID(motID):
            stmt = "DELETE from motifs where expID = '%s' and mTagID = '%s' " % (self.experimentID, motID)
            self.sqlexp(stmt, commit=True)
            stmt = "DELETE from motifPWMs where expID = '%s' and mTagID = '%s' " % (self.experimentID, motID)
            self.sqlexp(stmt, commit=True)
            stmt = "DELETE from motifSequences where expID = '%s' and mTagID = '%s' " % (self.experimentID, motID)
            self.sqlexp(stmt, commit=True)

        stmtList = []
        values = "(NULL, '%s', '%s', '%s', %f, '%s')" % (self.experimentID, motID, motifSeq, motifThreshold, motifInfo)
        stmt = "INSERT into motifs(ID, expID, mTagID, motifSeq, threshold, info) values %s " % values
        self.sqlexp(stmt, commit=True)
        pos = 0
        stmt = "INSERT into motifPWMs(ID, expID, mTagID, position, aFreq, cFreq, gFreq, tFreq) values (NULL, ?, ?, ?, ?, ?, ?, ?)"
        for col in mot.motifPWM:
            aFreq = round(col[matrixRow["A"]],4)
            cFreq = round(col[matrixRow["C"]],4)
            gFreq = round(col[matrixRow["G"]],4)
            tFreq = abs(1.0 - aFreq - cFreq - gFreq)
            stmtList.append((self.experimentID, motID, pos, aFreq, cFreq, gFreq, tFreq))
            pos += 1

        self.batchsqlexp(stmt, stmtList)
        if len(mot.sequences) > 0:
            stmt = "INSERT into motifSequences(ID, expID, mTagID, sequence, type, location) values (NULL, ?, ?, ?, ?, ?) " 
            stmtList = []
            for seq in mot.sequences:
                stmtList.append((self.experimentID, motID, seq, "instance", "-"))

            self.batchsqlexp(stmt, stmtList)


    def exportMotifs(self, directory=".", prefix="", suffix="mot"):
        stmt = "SELECT distinct mTagID from results where expID = '%s'  " % (self.experimentID)
        res = self.sqlexp(stmt)
        if prefix == "":
            prefix = self.experimentID

        for entry in res:
            motID = entry[0]
            mot = self.makeMotif(motID)
            fileName = "%s/%s-%s.%s" % (directory, prefix, motID, suffix)
            self.mlog("exporting %s as %s" % (motID, fileName))
            mot.saveMotif(fileName)


    def exportLogos(self, directory=".", prefix=""):
        stmt = "SELECT distinct mTagID from results where expID = '%s'  " % (self.experimentID)
        res = self.sqlexp(stmt)
        if prefix == "":
            prefix = self.experimentID
        for entry in res:
            motID = entry[0]
            mot = self.makeMotif(motID)
            fileName = "%s/%s-%s" % (directory, prefix, motID)
            self.mlog("saving logo for %s as %s.png" % (motID, fileName))
            mot.saveLogo(fileName)


    def appendResults(self, mot, resultsGroup="-"):
        motID = mot.tagID
        self.saveMotif(mot)
        stmt = "INSERT into results(ID, expID, resultsGroup, mTagID) values (NULL, '%s', '%s', '%s') " % (self.experimentID, resultsGroup, motID)
        self.sqlexp(stmt, "commit")


    def getGeneDB(self, geneDBFile=""):
        return self.geneDB


    def getLog(self):
        answer = []
        stmt = "SELECT timestamp, message from expLog where expID = '%s' order by timestamp" % self.experimentID
        res = self.sqlexp(stmt)
        for entry in res:
            (timestamp, message) = entry
            answer.append((eval(timestamp), message))

        return answer


    def getResults(self):
        answer = []
        stmt = "SELECT distinct mTagID from results where expID = '%s'  " % (self.experimentID)
        res = self.sqlexp(stmt)
        for entry in res:
            motID = entry[0]
            answer.append(self.makeMotif(motID))

        return answer


    def getSettings(self):
        answer = {}
        stmt = "SELECT settingName, data from settings where expID = '%s' " % (self.experimentID)
        res = self.sqlexp(stmt)
        for entry in res:
            (settingName, data) = entry
            if settingName not in answer.keys():
                answer[settingName] = []

            answer[settingName].append(data)

        return answer


    def getSetting(self, settingName):
        answer = []
        stmt = "SELECT data from settings where expID = '%s' and settingName = '%s' " % (self.experimentID, settingName)
        res = self.sqlexp(stmt)
        for entry in res:
            answer.append(entry[0])

        return answer


    def settingsHasKey(self, thekey):
        answer = False
        stmt = "SELECT distinct ID from settings where expID = '%s' and settingName = '%s' " % (self.experimentID, thekey)
        res = self.sqlexp(stmt)
        if len(res) > 0:
            answer = True

        return answer


    def setSettings(self, settingName, settingList):
        """ insert or replace (i.e. delete previous entry) a setting with one or more setting data.
        """
        # delete existing, insert new
        try:
            stmt = "DELETE from settings where expID = '%s' and settingName = '%s'  " % (self.experimentID, settingName)
            res = self.sqlexp(stmt, "commit")
        except:
            pass

        stmtList = []
        stmt = "INSERT into settings (ID, expID, settingName, data) values (NULL, ?, ?, ?)" 
        for entry in settingList:
            stmtList.append((self.experimentID, settingName, entry))

        res = self.batchsqlexp(stmt, stmtList)

        return settingName


    def setSettingsID(self, settingName, data):
        """ return the settingsID for the inserted settingName:data pair in the settings table.
        """
        stmt = "INSERT into settings (ID, expID, settingName, data) values (NULL, '%s', '%s', \"%s\")" % (self.experimentID, settingName, data)
        res = self.sqlexp(stmt, "commit")

        return res


    def getSettingsID(self, settingID):
        """ get a setting by settingsID  in the settings table.
        """
        answer = ""
        stmt = "SELECT settingName, data from  settings where expID = '%s' and ID =  %d" % (self.experimentID, int(settingID))
        res = self.sqlexp(stmt)
        try:
            (name, data) = res[0]
            answer = (name, data)
        except:
            pass

        return answer


    def setRun(self, progName, datasetID, settingsID):
        values = "(NULL, '%s', '%s', '%s', %d, '%s', '%s')" % (self.experimentID, progName, datasetID, settingsID, time.localtime(), "-")
        stmt = "INSERT into runs (ID, expID, progName, datasetGroup, settingsID, timestamp, resultsGroup) values %s" % values
        runID = self.sqlexp(stmt, "commit")
        self.mlog("run %s: program %s with settings %d and dataset %s" % (runID, progName, settingsID, datasetID))

        return int(runID)


    def getRun(self, rID):
        res = ""
        stmt = "SELECT progName, datasetGroup, settingsID, timestamp, resultsGroup from runs where expID = '%s' and ID = %d" % (self.experimentID, rID)
        res = self.sqlexp(stmt)
        (progName, datasetID, settingsID, timestamp, resultsID) = res[0]
        if resultsID == "-":
            resultsID = []
            if datasetID != "chromolist":
                datasetID = int(datasetID)

        return (progName, datasetID, int(settingsID), timestamp, resultsID)


    def getRunsByProg(self, prog):
        runs = self.getRuns()
        matchingRuns = []
        for entry in runs:
            if (runs[entry][0] == prog):
                matchingRuns.append(runs[entry])

        return matchingRuns


    def getRuns(self):
        runs = {}
        res = ''
        stmt = "SELECT ID, progName, datasetGroup, settingsID, timestamp, resultsGroup from runs where expID = '%s' " % (self.experimentID)
        res = self.sqlexp(stmt)
        for entry in res:
            (ID, progName, datasetID, settingsID, timestamp, resultsID) = entry
            runs[int(ID)] = (progName, datasetID, settingsID, eval(timestamp), resultsID)

        return runs


    def appendDataset(self, dset):
        stmtList = []
        stmt = "INSERT into dataset(ID, genome, locus, sequence, expID) values (NULL, ?, ?, ?, ?)  " 
        for (genome, entries) in dset:
            for entry in entries:
                if len(entry) == 2:
                    (locus, sequence) = entry
                else:
                    locus = entry
                    sequence = "-"

                stmtList.append((genome, locus, sequence, self.experimentID))

        res = self.batchsqlexp(stmt, stmtList)


    def getDataset(self):
        dset = []
        stmt = "SELECT genome, locus, sequence from dataset where expID = '%s' " % (self.experimentID)
        res = self.sqlexp(stmt)
        for entry in res:
            (genome, locus, sequence) = entry
            if sequence == "-":
                dset.append((genome, [locus]))
            else:
                dset.append((genome, [locus, sequence]))

        return dset


    def getDatasetNames(self):
        dset = []
        stmt = "select settingName from settings where settingName like 'dataset" + "%' and expID = '" + self.experimentID + "' " 
        res = self.sqlexp(stmt)
        for entry in res:
            dset.append(entry)

        return dset


    def getDatasetIDs(self):
        dset = []
        stmt = "select ID from settings where settingName like 'dataset" + "%' and expID = '" + self.experimentID + "' " 
        res = self.sqlexp(stmt)
        for entry in res:
            dset.append(entry)

        return dset


    def getFeatures(self, geneID):
        results = []
        try:
            res = []
            stmt = "select featureType, start, stop, orientation from sequenceFeatures where seqGenome = '%s' and seqID = '%s' and expID = '%s' " % (geneID[0], geneID[1], self.experimentID) 
            res = self.sqlexp(stmt)
            for entry in res:
                (ftype, start, stop, orientation) = entry
                results.append((ftype, start, stop, orientation))
        except:
            pass

        results.sort()

        return results


    def absoluteLocation(self, match, featureLength, seqparams="", gidCoordinates="", customDB=""):
        """ Returns the absolute location of the start of a match = ((genome, gID), (pos, sense)), given a relative 
            location with respect to the gene and a feature length.
            Can be passed cached sequence parameters (up, cds, down) and geneEntry tuple to avoid hitting database.
        """
        result = ["", 0, "F"]
        (geneID, loc) = match
        (pos, sense) = loc
        pos = int(pos)
        if seqparams == "":
            seqparams = self.getSeqParameters()

        (up, cds, down) = seqparams
        if gidCoordinates == "":
            gidCoordinates = cistematic.core.geneEntry(geneID)

        (gidChrom, gidStart, gidStop, gidLength, gidSense) = gidCoordinates
        result[0] = gidChrom
        if self.boundToNextGene:
            up = cistematic.core.upstreamToNextGene(geneID, up, db=customDB)
            down = cistematic.core.downstreamToNextGene(geneID, down, db=customDB)
    
        if gidSense == "F":
            result[2] = sense
            if pos < up or cds > 0:
                result[1] = gidStart - up + pos
            else:
                result[1] = gidStop - up + pos
        else:
            if gidStart > gidStop:
                temp = gidStart
                gidStart = gidStop
                gidStop = temp

            if sense == "F":
                result[2] = "R"

            if pos < up or cds > 0:
                result[1] = gidStop + up - pos - featureLength
            else:
                result[1] = gidStart + up - pos - featureLength
    
        return result


    def setWorkdir(self, wdir=""):
        self.workdir = wdir
        self.mlog("changed workdir to %s" % (wdir))


    def resetDataset(self):
        stmt = "DELETE from dataset where expID = '%s'" % self.experimentID
        res = self.sqlexp(stmt, "commit")
        self.mlog("log reset")


    def resetLog(self):
        stmt = "DELETE from expLog where expID = '%s'" % self.experimentID
        res = self.sqlexp(stmt, "commit")
        self.mlog("log reset")


    def resetResults(self):
        stmt = "DELETE from results where expID = '%s'" % self.experimentID
        res = self.sqlexp(stmt, "commit")
        self.mlog("results reset")


    def resetRuns(self):
        stmt = "DELETE from expLog where expID = '%s'" % self.experimentID
        res = self.sqlexp(stmt, "commit")
        self.mlog("runs reset")


    def resetPrograms(self):
        self.programs = []
        self.mlog("programs reset")


    def resetSettings(self):
        stmt = "DELETE from expLog where expID = '%s'" % self.experimentID
        res = self.sqlexp(stmt, "commit")
        self.log("settings reset")


    def appendProgram(self, program):
        self.programs.append((program, self.setSettingsID(program.name(), program.getSettings())))
        self.mlog("adding program %s" % program.name())


    def removeProgram(self, progName):
        for index in range(len(self.programs)):
            if self.programs[index][0].name() == progName:
                del self.programs[index]
                self.mlog("removed program %s" % progName)


    def createWorkdir(self):
        if self.workdir == "":
            self.workdir = tempfile.mktemp()

        try:
            os.mkdir(self.workdir)
            self.mlog("created workdir: %s" % (self.workdir))
        except:
            pass


    def removeWorkdir(self):
        try:
            filenames = os.listdir(self.workdir)
            for entry in filenames:
                os.remove("%s/%s" % (self.workdir, entry))
            os.rmdir(self.workdir)
        except:
            self.mlog("could not delete workdir: %s" % (self.workdir))


    def mlog(self,msg):
        print msg
        stmt = "INSERT into expLog(ID, expID, timestamp, message) values (NULL, '%s', '%s', '%s') " % (self.experimentID, time.localtime(),string.replace(msg, "'", '"'))
        self.sqlexp(stmt, "commit")


    def logToString(self):
        response = ""
        for line in self.getLog():
            response +=  time.asctime(line[0]) + ": " + line[1] + "\n"

        return response


    def printLog(self):
        for line in self.getLog():
            print "%s: %s" % (time.asctime(line[0]), line[1])


    def tailLog(self):
        theLog = self.getLog()
        for line in theLog[-10:]:
            print "%s: %s" % (time.asctime(line[0]), line[1])


    def loadPrograms(self):
        self.programs = []
        for progs in self.getSetting("loaded_programs"):
            (progs0, progs1, progs2) = progs.split("\t")
            execString = "from %s import %s" % (progs0, progs1)
            exec execString
            execString = 'self.programs.append((apply(%s), %s))' % (progs1, progs2)
            exec execString


    def savePrograms(self):
        progs = []
        for (program, settingID) in self.programs:
            progs.append("%s\t%s\t%s" % (program.__class__.__module__, program.__class__.__name__, settingID))

        if len(progs) > 0:
            self.setSettings("loaded_programs", progs)

    def saveGeneDB(self):
        if self.geneDB != "":
            self.setSettings("geneDB", [self.geneDB])


    def loadFasta(self, fastaFile, genomeName):
        """ load fasta file into the dataset.
        """
        genIDList = self.loadFastaFromFile(fastaFile, genomeName)

        return [(genomeName, genIDList)]


    def loadFastaFromFile(self, fastaFile, genomeName):
        seqArray = []
        seqLen = 0
        seqName = []
        geneDBPath = "%s.genedb" % genomeName
        self.mlog("Loading fasta file %s into database %s" % (genomeName, geneDBPath))
        aGenome = Genome(genomeName, dbFile=geneDBPath)
        aGenome.createGeneDB()
        inFile = open(fastaFile, "r")
        header = inFile.readline()
        while header != "":
            seqArray = []
            seqLen = 0
            chromID = header.strip()[1:]
            currentLine = inFile.readline()
            while currentLine != "" and currentLine[0] != ">":
                lineSeq = currentLine.strip()
                seqLen += len(lineSeq)
                seqArray.append(lineSeq)
                currentLine = inFile.readline()

            seq = string.join(seqArray, "")
            print "Added sequence %s to database" % chromID
            aGenome.addSequence((genomeName, chromID), seq, "chromosome", str(seqLen))
            aGenome.addChromosomeEntry(chromID, chromID, "db")
            aGenome.addGeneEntry((genomeName, chromID), chromID, 0, seqLen - 1, "F", "IMPORT", "1")
            header = currentLine
            seqName.append(chromID)

        inFile.close()
        aGenome.createIndices()
        self.setGeneDB(geneDBPath)

        return seqName


    def loadGenepool(self):
        self.genepool = {}
        for geneTuple in self.getDataset():
            genome = geneTuple[0]
            geneList = geneTuple[1]
            for gene in geneList:
                try:
                    if len(gene) == 2:
                        tag = gene[0]
                        geneID = (genome, tag)
                        seq = gene[1]
                        # need to deal with masking lowercase
                        self.genepool[geneID] = seq.upper()
                    else:
                        geneID = (genome, gene)
                        # note that we will only keep one copy of a geneid in the genepool, even if we
                        # retrieve it multiple times.
                        self.genepool[geneID] = retrieveSeq(geneID, self.upstream, self.cds, self.downstream, self.geneDB, self.maskLowerCase, self.boundToNextGene)
                except:
                    self.mlog("could not load %s" % (str(geneID)))

        self.genepoolID = self.setSettingsID("genepool", self.genepool.keys())


    def loadGeneFeatures(self):
        stmt = "DELETE from sequenceFeatures where expID = '%s'" % self.experimentID
        res = self.sqlexp(stmt, "commit")
        stmtList = []                
        stmt = "INSERT into sequenceFeatures (ID, expID, seqGenome, seqID, featureType, start, stop, orientation) values (NULL, ?, ?, ?, ?, ?, ?, ?)" 
        for geneTuple in self.getDataset():
            genome = geneTuple[0]
            geneList = geneTuple[1]
            for gene in geneList:
                try:
                    geneID = (genome, gene)
                    results = retrieveSeqFeatures(geneID, self.upstream, self.cds, self.downstream, self.boundToNextGene, self.geneDB)
                    for entry in results:
                        (ftype, start, stop, orientation) = entry
                        stmtList.append((self.experimentID, genome, gene, ftype, start, stop, orientation))
                except:
                    self.mlog("could not find features for %s" % (gene))

        if len(stmtList) > 0:
            self.batchsqlexp(stmt, stmtList)


    def reload(self):
        pass


    def toFile(self, geneIDList, filename, geneDict=[]):
        outFile = open(filename, "w")
        for geneID in geneIDList:
            if geneID in geneDict:
                outFile.write(fasta(geneID, geneDict[geneID]))
            elif geneID in self.genepool:
                outFile.write(fasta(geneID, self.genepool[geneID]))
            else:
                self.mlog("could not write %s to file" % str(geneID))

        outFile.close()


    def createDataFile(self, datasetID=-1, geneIDList=[], geneDict=[]):
        oldtempdir = tempfile.tempdir
        tempfile.tempdir = self.workdir
        dataFile = tempfile.mktemp()
        tempfile.tempdir = oldtempdir
        if datasetID < 0:
            datasetID = self.genepoolID

        if len(geneIDList) < 1:
            settingsList =  self.getSettingsID(datasetID)
            geneIDList = eval(settingsList[1])

        try:
            self.toFile(geneIDList, dataFile, geneDict)
        except:
            self.mlog("could not create dataFile %s" % (dataFile))

        return dataFile


    def initialize(self, dataset=[], workdir=""):
        if workdir != "":
            self.setWorkdir(workdir)

        self.createWorkdir()
        if len(dataset) > 0:
            self.appendDataset(dataset)
            self.loadGenepool()
            self.loadGeneFeatures()


    def run(self):
        if len(self.programs)  == 0:
            self.mlog("Must instantiate one or more programs first")
        elif len(self.genepool) == 0:
            self.mlog("Must have one or more valid sequences in the dataset")


    def sqlexp(self, stmt, commit=""):
        db = sqlite.connect(self.expFile, timeout=60)
        sqlc = db.cursor()
        if self.debugSQL:
            print "sqlexp: %s" % stmt

        sqlc.execute(stmt)
        res = sqlc.fetchall()
        if commit != "":
            db.commit()

        if stmt[0:6] == "INSERT":
            res = sqlc.lastrowid

        sqlc.close()
        db.close()

        return res


    def batchsqlexp(self, stmt, batch):
        """ executes a list of sql statements (usually inserts) stored in the list batch with a single commit.
        """
        res = []
        db = sqlite.connect(self.expFile, timeout=60)        
        sqlc = db.cursor()
        if self.debugSQL:
            print "batchsql: %s" % stmt
            print "batchsql: %s" % str(batch)

        sqlc.executemany(stmt, batch)
        db.commit()
        sqlc.close()
        db.close()

        return res


    def setExternalStatus(self, status):
        self.mlog("Setting status to %s" % status)
        statfile = open("%s.status" % self.expFile, "w")
        statfile.write(status)
        statfile.close()


    def resetExternalStatus(self):
        try:
            os.remove("%s.status" % self.expFile)
        except:
            pass


    def startDB(self):
        if not os.path.exists(self.expFile):
            try:
                db = sqlite.connect(self.expFile, timeout=60)
                sql = db.cursor()
                sql.execute("CREATE table experiment(ID INTEGER PRIMARY KEY, expID varchar, expType varchar, expStatus varchar, timestamp varchar)")
                sql.execute("CREATE table dataset(ID INTEGER PRIMARY KEY, expID varchar, datasetGroup varchar, genome varchar, locus varchar, sequence varchar)")
                sql.execute("CREATE table results(ID INTEGER PRIMARY KEY, expID varchar, resultsGroup varchar, mTagID varchar)")
                sql.execute("CREATE table motifs(ID INTEGER PRIMARY KEY, expID varchar, mTagID varchar, motifSeq varchar, threshold varchar, info varchar)")
                sql.execute("CREATE table motifPWMs(ID INTEGER PRIMARY KEY, expID varchar, mTagID varchar, position int, aFreq float, cFreq float, gFreq float, tFreq float)")
                sql.execute("CREATE table motifSequences(ID INTEGER PRIMARY KEY, expID varchar, mTagID varchar, sequence varchar, type varchar, location varchar)")
                sql.execute("CREATE table settings(ID INTEGER PRIMARY KEY, expID varchar, settingName varchar, data varchar)")
                sql.execute("CREATE table runs(ID INTEGER PRIMARY KEY, expID varchar, progName varchar, datasetGroup varchar, settingsID int, timestamp varchar, resultsGroup varchar)")
                sql.execute("CREATE table expLog(ID INTEGER PRIMARY KEY, expID varchar, timestamp varchar, message varchar)")
                sql.execute("CREATE table sequenceFeatures(ID INTEGER PRIMARY KEY, expID varchar, seqGenome varchar, seqID varchar, featureType varchar, start int, stop int, orientation varchar)")

                sql.execute("CREATE INDEX settingIndex1 on settings(expID, settingName)")
                sql.execute("CREATE INDEX datasetIndex1 on dataset(expID, datasetGroup)")
                sql.execute("CREATE INDEX motifsIndex1 on motifs(expID, mTagID)")
                sql.execute("CREATE INDEX motifPWMsIndex1 on motifPWMs(expID, mTagID)")
                sql.execute("CREATE INDEX motifSequencesIndex1 on motifSequences(expID, mTagID)")
                sql.execute("CREATE INDEX featuresIndex1 on sequenceFeatures(expID, seqGenome, seqID)")
                
                sql.execute("INSERT INTO settings(ID, expID, settingName, data) values (NULL, '%s', 'experimentType', '%s')" % (self.experimentID, self.experimentType))
                
                db.commit()
                sql.close()
                db.close()
                self.mlog("Created experiment database %s" % self.expFile)
            except:
                self.mlog("Could not create experiment database %s" % self.expFile)
        else:
            self.mlog("Using existing experiment database %s" % self.expFile)

        if self.settingsHasKey("loaded_programs"):
            self.loadPrograms()

        if self.settingsHasKey("seq_parameters"):
            res = self.getSetting("seq_parameters")
            (up, cds, down) = res[0].split("\t")
            self.setSeqParameters(int(up), int(cds), int(down))
        else:
            self.setSeqParameters()

        if self.settingsHasKey("maskLowerCase"):
            res = self.getSetting("maskLowerCase")
            self.setMaskLowerCase(res[0])
        else:
            self.setMaskLowerCase(False)

        if self.settingsHasKey("boundToNextGene"):
            res = self.getSetting("boundToNextGene")
            self.setBoundToNextGene(res[0])
        else:
            self.setBoundToNextGene(False)

        if self.settingsHasKey("experimentType"):
            res = self.getSetting("experimentType")
            self.experimentType = res[0]

        if self.settingsHasKey("geneDB"):
            res = self.getSetting("geneDB")
            self.geneDB = res[0]

        if self.dsetLength() > 0:
            self.loadGenepool()

        self.createWorkdir()