import sqlite3 as sqlite
import string
import tempfile
import shutil
import os
import re
import sys
import pysam
from array import array
from commoncode import getReverseComplement, getConfigParser, getConfigOption

currentRDSVersion = "3.0"


class ReadDatasetError(Exception):
    pass


class ReadCounter():
    uniqReadCount = 0
    multiReadCount = 0.0
    spliceReadCount = 0

    def __init__(self, multiReadIDCount={}, sense="both"):
        self.multiReadIDCount = multiReadIDCount
        self.sense = sense


    def __call__(self, read):
        multiplicity = self.getReadMultiplicity(read)
        if multiplicity > 1:
            self.multiReadCount += float(1.0/multiplicity)
        elif isSpliceEntry(read.cigar):
            self.spliceReadCount += 1
        else:
            self.uniqReadCount += 1


    def getReadMultiplicity(self, read):
        pairReadSuffix = getPairedReadNumberSuffix(read)
        readID = "%s%s" % (read.qname, pairReadSuffix)
        try:
            multiplicity = self.multiReadIDCount[readID]
        except KeyError:
            multiplicity = 1

        return multiplicity


    def isMulti(self, read):
        pairReadSuffix = getPairedReadNumberSuffix(read)
        readID = "%s%s" % (read.qname, pairReadSuffix)
        return self.multiReadIDCount.has_key(readID)



class MaxCoordFinder(ReadCounter):
    maxUniqueStart = 0
    maxMultiStart = 0
    maxSpliceStart = 0

    def __call__(self, read):
        if self.isMulti(read):
            self.maxMultiStart = max(self.maxMultiStart, read.pos)
        elif isSpliceEntry(read.cigar):
            self.maxSpliceStart = max(self.maxMultiStart, getSpliceRightStart(read.pos, read.cigar))
        else:
            self.maxUniqueStart = max(self.maxMultiStart, read.pos)



class ReadDataset():
    """ Class for storing reads from experiments. Assumes that custom scripts
    will translate incoming data into a format that can be inserted into the
    class using the insert* methods. Default class subtype ('DNA') includes
    tables for unique and multireads, whereas 'RNA' subtype also includes a
    splices table.
    """

    #TODO: Primary
    def __init__(self, datafile, initialize=False, datasetType="DNA", verbose=False, 
                 cache=False, reportCount=True):
        """ creates an rds datafile if initialize is set to true, otherwise
        will append to existing tables. datasetType can be either 'DNA' or 'RNA'.
        """
        self.dbcon = ""
        self.memcon = ""
        self.dataType = ""
        self.rdsVersion = currentRDSVersion
        self.memBacked = False
        self.memChrom = ""
        self.memCursor = ""
        self.cachedDBFile = ""
        #TODO: declaration of self.readsize, self.totalReadWeight needed for bam conversion
        self.multiReadIDCounts = self.getMultiReadIDCounts(datafile, "rb")
        self.bamfile = pysam.Samfile(datafile, "rb")
        self.readsize = None
        self.totalReadWeight = None

        self.metadata = {"dataType": "RNA"}
        if initialize and datasetType not in ["DNA", "RNA"]:
            raise ReadDatasetError("failed to initialize: datasetType must be 'DNA' or 'RNA'")

        #if cache:
        #    if verbose:
        #        print "caching ...."

        #    self.cacheDB(datafile)
        #    dbFile = self.cachedDBFile
        #else:
        #    dbFile = datafile

        #self.dbcon = sqlite.connect(dbFile)
        #self.dbcon.row_factory = sqlite.Row
        #self.dbcon.execute("PRAGMA temp_store = MEMORY")
        if initialize:
            self.dataType = datasetType
            self.initializeTables(self.dbcon)
        else:
            self.dataType = self.metadata["dataType"]

        try:
            self.rdsVersion = self.metadata["rdsVersion"]
        except KeyError:
            self.insertMetadata([("rdsVersion", float(currentRDSVersion))])

        self.fullReadCounts = self.getFullReadCount()
        if verbose:
            self.printRDSInfo(datafile, reportCount, initialize)


    #TODO: deprecated
    #def __len__(self):
    #    """ return the number of usable reads in the dataset.
    #    """
    #    total = self.getUniqsCount()
    #    total += self.getMultiCount()

    #    if self.dataType == "RNA":
    #        total += self.getSplicesCount()

    #    total = int(total)

    #    return total


    def __len__(self):
        """ return the number of reads in the bam file
            This is not the same as the original as the original returned total weight.
        """
        count = 0
        references = self.bamfile.references
        for reference in references:
            count += self.bamfile.count(reference)

        return count


    def getMultiReadIDCounts(self, samFileName, fileMode):
        try:
            samfile = pysam.Samfile(samFileName, fileMode)
        except ValueError:
            print "samfile index not found"
            sys.exit(1)

        readIDCounts = {}
        for read in samfile.fetch(until_eof=True):
            pairReadSuffix = getPairedReadNumberSuffix(read)
            readName = "%s%s" % (read.qname, pairReadSuffix)
            try:
                readIDCounts[readName] += 1
            except KeyError:
                readIDCounts[readName] = 1

        for readID in readIDCounts.keys():
            if readIDCounts[readID] == 1:
                del readIDCounts[readID]

        return readIDCounts


    def totalReadWeight(self):
        """ return the total weight of usable reads in the bam file.
        """
        if self.totalReadWeight is None:
            total = self.fullReadCounts["uniq"]
            total += self.fullReadCounts["multi"]

            if self.dataType == "RNA":
                total += self.fullReadCounts["splice"]

            self.totalReadWeight = int(total)

        return self.totalReadWeight


    #TODO: key function
    def __del__(self):
        """ cleanup copy in local cache, if present.
        """
        if self.cachedDBFile != "":
            self.uncacheDB()


    #TODO: Secondary to __init__()
    def printRDSInfo(self, datafile, reportCount, initialize):
        if initialize:
            print "INITIALIZED dataset %s" % datafile
        else:
            print "dataset %s" % datafile

        print "metadata:"
        pnameList = self.metadata.keys()
        pnameList.sort()
        for pname in pnameList:
            print "\t" + pname + "\t" + str(self.metadata[pname])

        if reportCount and not initialize:
            self.printReadCounts()

        print "default cache size is %d pages" % self.getDefaultCacheSize()
        #if self.hasIndex():
        #    print "found index"
        #else:
        #    print "not indexed"


    #TODO: Secondary to __init__()
    def printReadCounts(self):
        ucount = self.fullReadCounts["uniq"]
        mcount = self.fullReadCounts["multi"]
        if self.dataType == "DNA":
            print "\n%d unique reads and %d multireads" % (ucount, mcount)
        elif self.dataType == "RNA":
            scount = self.fullReadCounts["splice"]
            print "\n%d unique reads, %d spliced reads and %d multireads" % (ucount, scount, mcount)


    def cacheDB(self, filename):
        """ copy geneinfoDB to a local cache.
        """
        configParser = getConfigParser()
        cisTemp = getConfigOption(configParser, "general", "cistematic_temp", default="/tmp")
        tempfile.tempdir = cisTemp
        self.cachedDBFile =  "%s.db" % tempfile.mktemp()
        shutil.copyfile(filename, self.cachedDBFile)


    def saveCacheDB(self, filename):
        """ copy geneinfoDB to a local cache.
        """
        shutil.copyfile(self.cachedDBFile, filename)


    def uncacheDB(self):
        """ delete geneinfoDB from local cache.
        """
        global cachedDBFile
        if self.cachedDBFile != "":
            try:
                os.remove(self.cachedDBFile)
            except:
                print "could not delete %s" % self.cachedDBFile

            self.cachedDB = ""


    def attachDB(self, filename, dbName):
        """ attach another database file to the readDataset.
        """
        stmt = "attach '%s' as %s" % (filename, dbName)
        self.execute(stmt)


    def detachDB(self, dbName):
        """ detach a database file to the readDataset.
        """
        stmt = "detach %s" % (dbName)
        self.execute(stmt)


    def importFromDB(self, dbName, table, ascolumns="*", destcolumns="", flagged=""):
        """ import into current RDS the table (with columns destcolumns,
            with default all columns) from the database file asname,
            using the column specification of ascolumns (default all).
        """
        stmt = "insert into %s %s select %s from %s.%s" % (table, destcolumns, ascolumns, dbName, table)
        if flagged != "":
            stmt += " where flag = '%s' " % flagged

        self.executeCommit(stmt)


    #TODO: only used by combinerds - probably not needed after bam conversion
    def getTables(self, dbName=""):
        """ get a list of table names in a particular database file.
        """
        resultList = []
        sql = self.getSqlCursor()

        if dbName != "":
            dbName = "%s." % dbName

        stmt = "select name from %ssqlite_master where type='table'" % dbName
        sql.execute(stmt)
        results = sql.fetchall()

        for row in results:
            resultList.append(row["name"])

        return resultList


    def getSqlCursor(self):
        if self.memBacked:
            sql = self.getMemCursor()
        else:
            sql = self.getFileCursor()

        return sql


    def getMemCursor(self):
        """ returns a cursor to memory database for low-level (SQL)
        access to the data.
        """
        return self.memcon.cursor()


    def getFileCursor(self):
        """ returns a cursor to file database for low-level (SQL)
        access to the data.
        """
        return self.dbcon.cursor()


    #TODO: deprecated
    #def hasIndex(self):
    #    """ return True if the RDS file has at least one index.
    #    """
    #    stmt = "select count(*) from sqlite_master where type='index'"
    #    count = int(self.execute(stmt, returnResults=True)[0][0])

    #    return count > 0


    #TODO: Secondary to __init__()
    def initializeTables(self, dbConnection, cache=100000):
        """ creates table schema in a database connection, which is
        typically a database file or an in-memory database.
        """
        dbConnection.execute("PRAGMA DEFAULT_CACHE_SIZE = %d" % cache)
        dbConnection.execute("create table metadata (name varchar, value varchar)")
        dbConnection.execute("insert into metadata values('dataType','%s')" % self.dataType)
        positionSchema = "start int, stop int"
        tableSchema = "(ID INTEGER PRIMARY KEY, readID varchar, chrom varchar, %s, sense varchar, weight real, flag varchar, mismatch varchar)" % positionSchema
        dbConnection.execute("create table uniqs %s" % tableSchema)
        dbConnection.execute("create table multi %s" % tableSchema)
        if self.dataType == "RNA":
            positionSchema = "startL int, stopL int, startR int, stopR int"
            tableSchema = "(ID INTEGER PRIMARY KEY, readID varchar, chrom varchar, %s, sense varchar, weight real, flag varchar, mismatch varchar)" % positionSchema
            dbConnection.execute("create table splices %s" % tableSchema)

            positionSchema = "startL int, stopL int, startR int, stopR int"
            tableSchema = "(ID INTEGER PRIMARY KEY, readID varchar, chrom varchar, %s, sense varchar, weight real, flag varchar, mismatch varchar)" % positionSchema
            dbConnection.execute("create table multisplices %s" % tableSchema)

        dbConnection.commit()


    #TODO: deprecated
    #def getMetadata(self, valueName=""):
    #    """ returns a dictionary of metadata.
    #    """
    #    whereClause = ""
    #    resultsDict = {}

    #    if valueName != "":
    #        whereClause = " where name='%s'" % valueName

    #    sql = self.getSqlCursor()

    #    sql.execute("select name, value from metadata %s" % whereClause)
    #    results = sql.fetchall()

    #    for row in results:
    #        parameterName = row["name"]
    #        parameterValue = row["value"]
    #        if parameterName not in resultsDict:
    #            resultsDict[parameterName] = parameterValue
    #        else:
    #            trying = True
    #            index = 2
    #            while trying:
    #                newName = string.join([parameterName, str(index)], ":")
    #                if newName not in resultsDict:
    #                    resultsDict[newName] = parameterValue
    #                    trying = False

    #                index += 1

    #    return resultsDict


    def getMetadata(self, valueName=""):        
        return self.metadata

    #TODO: deprecated
    #def getReadSize(self):
    #    """ returns readsize if defined in metadata.
    #    """
    #    metadata = self.getMetadata()
    #    if "readsize" not in metadata:
    #        raise ReadDatasetError("no readsize parameter defined")
    #    else:
    #        readSize = metadata["readsize"]
    #        if "import" in readSize:
    #            readSize = readSize.split()[0]

    #        readSize = int(readSize)
    #        if readSize < 0:
    #            raise ReadDatasetError("readsize is negative")

    #        return readSize


    def getReadSize(self):
        """ This is following the same model as the original where it assumes that all
            read have the same readsize and that this is reflected by the size of the
            first read.
        """
        if self.readsize is None:
            bamFileIterator = self.bamfile.fetch(until_eof=True)
            for read in bamFileIterator:
                self.calculateReadsizeFromCigar(read.cigar)
                break

        return self.readsize


    def calculateReadsizeFromCigar(self, cigar):
        take = (0, 1) # CIGAR operation (M/match, I/insertion)
        self.readsize = sum([length for op,length in cigar if op in take])


    #TODO: deprecated
    #def getDefaultCacheSize(self):
    #    """ returns the default cache size.
    #    """
    #    return int(self.execute("PRAGMA DEFAULT_CACHE_SIZE", returnResults=True)[0][0])


    def getDefaultCacheSize(self):
        """ returns 0 as cache is going to be deprecated
        """
        return 0


    #TODO: deprecated
    #def getChromosomes(self, table="uniqs", fullChrom=True):
    #    """ returns a sorted list of distinct chromosomes in table.
    #    """
    #    statement = "select distinct chrom from %s" % table
    #    sql = self.getSqlCursor()

    #    sql.execute(statement)
    #    results = []
    #    for row in sql:
    #        if fullChrom:
    #            if row["chrom"] not in results:
    #                results.append(row["chrom"])
    #        else:
    #            shortName = row["chrom"][3:]
    #            if  len(shortName.strip()) > 0 and shortName not in results:
    #                results.append(shortName)

    #    results.sort()

    #    return results


    def getChromosomes(self, fullChrom=True):
        """ returns a sorted list of distinct chromosomes in bam file reference list.
        """
        if fullChrom:
            results = list(self.getFullReferenceNames())
        else:
            results = list(self.getShortReferenceNames())

        results.sort()

        return results


    def getFullReferenceNames(self):
        """ returns a set of bam file reference names.
        """
        return set(self.bamfile.references)


    def getShortReferenceNames(self):
        """ returns a set of bam file reference names after removing first 3 characters.
        """
        results = set()
        references = self.bamfile.references
        for chromosome in references:
            shortName = chromosome[3:]
            if len(shortName.strip()) > 0:
                results.add(shortName)

        return results


    #TODO: deprecated
    #def getMaxCoordinate(self, chrom, doUniqs=True,
    #                     doMulti=False, doSplices=False):
    #    """ returns the maximum coordinate for reads on a given chromosome.
    #    """
    #    maxCoord = 0

    #    if doUniqs:
    #        maxCoord = self.getMaxStartCoordinateInTable(chrom, "uniqs")

    #    if doSplices:
    #        spliceMax = self.getMaxStartCoordinateInTable(chrom, "splices", startField="startR")
    #        maxCoord = max(spliceMax, maxCoord)

    #    if doMulti:
    #        multiMax = self.getMaxStartCoordinateInTable(chrom, "multi")
    #        maxCoord = max(multiMax, maxCoord)

    #    return maxCoord


    def getMaxCoordinate(self, chrom, doUniqs=True,
                         doMulti=False, doSplices=False):
        """ returns the maximum coordinate for reads on a given chromosome.
        """
        maxCoord = 0
        coordFinder = MaxCoordFinder()
        self.bamfile.fetch(reference=chrom, callback=coordFinder)

        if doUniqs:
            maxCoord = coordFinder.maxUniqueStart

        if doSplices:
            maxCoord = max(coordFinder.maxSpliceStart, maxCoord)

        if doMulti:
            maxCoord = max(coordFinder.maxMultiStart, maxCoord)

        return maxCoord


    #TODO: Secondary to getMaxCordinate() not needed in bam conversion
    def getMaxStartCoordinateInTable(self, chrom, table, startField="start"):
        maxCoord = 0
        sqlStatement = "select max(%s) from %s where chrom = '%s'" % (startField, table, chrom)
        sql = self.getSqlCursor()
        try:
            sql.execute(sqlStatement)
            maxCoord = int(sql.fetchall()[0][0])
        except:
            print "couldn't retrieve coordMax for chromosome %s" % chrom

        return maxCoord


    #TODO: deprecated
    #def getReadsDict(self, bothEnds=False, noSense=False, fullChrom=False, chrom="",
    #                 flag="", withWeight=False, withFlag=False, withMismatch=False, withID=False,
    #                 withChrom=False, withPairID=False, doUniqs=True, doMulti=False, findallOptimize=False,
    #                 readIDDict=False, readLike="", start=-1, stop=-1, limit=-1, hasMismatch=False,
    #                 flagLike=False, strand='', combine5p=False):
    #    """ returns a dictionary of reads in a variety of formats
    #    and which can be restricted by chromosome or custom-flag.
    #    Returns unique reads by default, but can return multireads
    #    with doMulti set to True.

    #    """
    #    #TODO: Need to rethink original design 1: Cannot have pairID without exporting as a readIDDict

    #    whereQuery = self.getReadWhereQuery(chrom, flag, flagLike, start, stop, hasMismatch, strand, readLike)
    #    if findallOptimize:
    #        selectQuery = "select start, sense, sum(weight)"
    #    else:
    #        selectQuery = self.getReadSelectQuery("select ID, chrom, start, readID", noSense, withWeight, withFlag, withMismatch, bothEnds)

    #    groupQuery = self.getReadGroupQuery(findallOptimize, limit, combine5p)
    #    if doUniqs:
    #        stmt = [selectQuery, "from uniqs", whereQuery, groupQuery]
    #        if doMulti:
    #            stmt.append("UNION ALL")
    #            stmt.append(selectQuery)
    #            stmt.append("from multi")
    #            stmt.append(whereQuery)
    #            stmt.append(groupQuery)
    #    else:
    #        stmt = [selectQuery, "from multi", whereQuery]

    #    if combine5p:
    #        if findallOptimize:
    #            selectQuery = "select start, sense, weight, chrom"

    #        if doUniqs:
    #            subSelect = [selectQuery, "from uniqs", whereQuery]
    #            if doMulti:
    #                subSelect.append("union all")
    #                subSelect.append(selectQuery)
    #                subSelect.append("from multi")
    #                subSelect.append(whereQuery)
    #        else:
    #            subSelect = [selectQuery, "from multi", whereQuery]

    #        sqlStmt = string.join(subSelect)
    #        if findallOptimize:
    #            selectQuery = "select start, sense, sum(weight)"

    #        stmt = [selectQuery, "from (", sqlStmt, ") group by chrom,start having ( count(start) > 1 and count(chrom) > 1) union",
    #                selectQuery, "from(", sqlStmt, ") group by chrom, start having ( count(start) = 1 and count(chrom) = 1)"]

    #    if findallOptimize:
    #        if self.memBacked:
    #            self.memcon.row_factory = None
    #        else:
    #            self.dbcon.row_factory = None

    #        stmt.append("order by start")
    #    elif readIDDict:
    #        stmt.append("order by readID, start")
    #    else:
    #        stmt.append("order by chrom, start")

    #    sql = self.getSqlCursor()
    #    sqlQuery = string.join(stmt)
    #    sql.execute(sqlQuery)

    #    resultsDict = {}
    #    if findallOptimize:
    #        resultsDict[chrom] = [{"start": int(row[0]), "sense": row[1], "weight": float(row[2])} for row in sql]
    #        if self.memBacked:
    #            self.memcon.row_factory = sqlite.Row
    #        else:
    #            self.dbcon.row_factory = sqlite.Row
    #    else:
    #        currentChrom = ""
    #        currentReadID = ""
    #        pairID = 0
    #        for row in sql:
    #            readID = row["readID"]
    #            if fullChrom:
    #                chrom = row["chrom"]
    #            else:
    #                chrom = row["chrom"][3:]

    #            if not readIDDict and chrom != currentChrom:
    #                resultsDict[chrom] = []
    #                currentChrom = chrom
    #                dictKey = chrom
    #            elif readIDDict:
    #                theReadID = readID
    #                if "::" in readID:
    #                    theReadID = readID.split("::")[0]

    #                if "/" in theReadID and withPairID:
    #                    (theReadID, pairID) = readID.split("/")

    #                if theReadID != currentReadID:
    #                    resultsDict[theReadID] = []
    #                    currentReadID = theReadID
    #                    dictKey = theReadID

    #            newrow = {"start": int(row["start"])}
    #            if bothEnds:
    #                newrow["stop"] = int(row["stop"])

    #            if not noSense:
    #                newrow["sense"] = row["sense"]

    #            if withWeight:
    #                newrow["weight"] = float(row["weight"])

    #            if withFlag:
    #                newrow["flag"] = row["flag"]

    #            if withMismatch:
    #                newrow["mismatch"] = row["mismatch"]

    #            if withID:
    #                newrow["readID"] = readID

    #            if withChrom:
    #                newrow["chrom"] = chrom

    #            if withPairID:
    #                newrow["pairID"] = pairID

    #            resultsDict[dictKey].append(newrow)

    #    return resultsDict


    def getReadsDict(self, bothEnds=False, noSense=False, fullChrom=False, chrom="",
                     flag="", withWeight=False, withFlag=False, withMismatch=False, withID=False,
                     withChrom=False, withPairID=False, doUniqs=True, doMulti=False, findallOptimize=False,
                     readIDDict=False, readLike="", start=None, stop=None, limit=-1, hasMismatch=False,
                     flagLike=False, strand='', combine5p=False):
        """ First pass of rewrite
            1) Leave as much in place as possible
            2) For now treat multis just like uniqs
        """
        """
        whereQuery = self.getReadWhereQuery(chrom, flag, flagLike, start, stop, hasMismatch, strand, readLike)
        if findallOptimize:
            selectQuery = "select start, sense, sum(weight)"
        else:
            selectQuery = self.getReadSelectQuery("select ID, chrom, start, readID", noSense, withWeight, withFlag, withMismatch, bothEnds)

        groupQuery = self.getReadGroupQuery(findallOptimize, limit, combine5p)
        if doUniqs:
            stmt = [selectQuery, "from uniqs", whereQuery, groupQuery]
            if doMulti:
                stmt.append("UNION ALL")
                stmt.append(selectQuery)
                stmt.append("from multi")
                stmt.append(whereQuery)
                stmt.append(groupQuery)
        else:
            stmt = [selectQuery, "from multi", whereQuery]

        if combine5p:
            if findallOptimize:
                selectQuery = "select start, sense, weight, chrom"

            if doUniqs:
                subSelect = [selectQuery, "from uniqs", whereQuery]
                if doMulti:
                    subSelect.append("union all")
                    subSelect.append(selectQuery)
                    subSelect.append("from multi")
                    subSelect.append(whereQuery)
            else:
                subSelect = [selectQuery, "from multi", whereQuery]

            sqlStmt = string.join(subSelect)
            if findallOptimize:
                selectQuery = "select start, sense, sum(weight)"

            stmt = [selectQuery, "from (", sqlStmt, ") group by chrom,start having ( count(start) > 1 and count(chrom) > 1) union",
                    selectQuery, "from(", sqlStmt, ") group by chrom, start having ( count(start) = 1 and count(chrom) = 1)"]

        if findallOptimize:
            if self.memBacked:
                self.memcon.row_factory = None
            else:
                self.dbcon.row_factory = None

            stmt.append("order by start")
        elif readIDDict:
            stmt.append("order by readID, start")
        else:
            stmt.append("order by chrom, start")

        sql = self.getSqlCursor()
        sqlQuery = string.join(stmt)
        sql.execute(sqlQuery)
        """

        # Edits are starting here - trying to ignore the entire sql construct
        bamFileIterator = self.bamfile.fetch(reference=chrom, start=start, end=stop)
        resultsDict = {}
        if findallOptimize and chrom != "":
            resultsDict[chrom] = [{"start": int(read.pos), "sense": getReadSense(read.is_reverse), "weight": 1.0} for read in bamFileIterator if not isSpliceEntry(read.cigar)]
        else:
            pairID = 0
            for read in bamFileIterator:
                pairReadSuffix = getPairedReadNumberSuffix(read)
                readID = "%s%s" % (read.qname, pairReadSuffix)
                if isSpliceEntry(read.cigar) or self.readDoesNotMeetCriteria(read, readID, doMulti=doMulti):
                    continue

                sense = getReadSense(read.is_reverse)
                if fullChrom:
                    chrom = self.bamfile.getrname(read.rname)
                else:
                    chrom = self.bamfile.getrname(read.rname)[3:]

                if readIDDict:
                    theReadID = readID
                    if "::" in readID:
                        theReadID = readID.split("::")[0]

                    if "/" in theReadID and withPairID:
                        (theReadID, pairID) = readID.split("/")

                    dictKey = theReadID
                else:
                    dictKey = chrom

                newrow = {"start": int(read.pos)}
                if bothEnds:
                    if self.readsize is None:
                        self.calculateReadsizeFromCigar(read.cigar)

                    newrow["stop"] = int(read.pos + self.readsize)

                if not noSense:
                    newrow["sense"] = sense

                if withWeight:
                    try:
                        newrow["weight"] = 1/self.multiReadIDCounts[readID]
                    except KeyError:
                        newrow["weight"] = 1.0

                if withFlag:
                    #newrow["flag"] = row["flag"]
                    pass

                if withMismatch:
                    try:
                        mismatchTag = read.opt("MD")
                        mismatches = getMismatches(mismatchTag, read.seq, sense)
                    except KeyError:
                        mismatches = ""

                    newrow["mismatch"] = mismatches

                if withID:
                    newrow["readID"] = readID

                if withChrom:
                    newrow["chrom"] = chrom

                if withPairID:
                    newrow["pairID"] = pairID

                try:
                    resultsDict[dictKey].append(newrow)
                except KeyError:
                    resultsDict[dictKey] = [newrow]

        return resultsDict


    def readDoesNotMeetCriteria(self, read, readID, doMulti=False, flag="", readLike="", hasMismatch=False, strand=""):
        readDoesNotMeetCriteria = self.rejectMultiReadEntry(readID, doMulti)

        if flag != "":
            #TODO: need to pick a tag code for the erange flag - using ZF for now
            # although it looks like the flag is really just a label on a region.
            # since BAM can retrieve based on location if we store the regions then
            # we can just retrieve it directly from the BAM and we do not need to
            # flag the individual reads.  The location will be sufficient.
            try:
                if flag != read.opt("ZF"):
                    readDoesNotMeetCriteria = True
            except KeyError:
                readDoesNotMeetCriteria = True

        if len(readLike) > 0:
            #TODO: verify that readLike is just not used anywhere and remove
            pass

        if hasMismatch:
            try:
                mismatchTag = read.opt("MD")
            except KeyError:
                readDoesNotMeetCriteria = True

        if strand in ["+", "-"] and getReadSense(read.is_reverse) != strand:
            readDoesNotMeetCriteria = True

        return readDoesNotMeetCriteria


    def rejectMultiReadEntry(self, readID, doMulti=False, threshold=10):
        rejectRead = False
        if readID in self.multiReadIDCounts.keys():
            if self.readIDCounts[readID] > threshold or not doMulti:
                rejectRead = True

        return rejectRead


    #TODO: Secondary to getReadsDict()
    def getReadWhereQuery(self, chrom, flag, flagLike, start, stop, hasMismatch, strand, readLike="", splice=False):
        if splice:
            startText = "startL"
            stopText = "stopR"
        else:
            startText = "start"
            stopText = "stop"

        whereClause = []
        if chrom != "" and chrom != self.memChrom:
            whereClause.append("chrom = '%s'" % chrom)

        if flag != "":
            if flagLike:
                flagLikeClause = string.join(['flag LIKE "%', flag, '%"'], "")
                whereClause.append(flagLikeClause)
            else:
                whereClause.append("flag = '%s'" % flag)

        if start > -1:
            whereClause.append("%s > %d" % (startText, start))

        if stop > -1:
            whereClause.append("%s < %d" % (stopText, stop))

        if len(readLike) > 0:
            readIDClause = string.join(["readID LIKE  '", readLike, "%'"], "")
            whereClause.append(readIDClause)

        if hasMismatch:
            whereClause.append("mismatch != ''")

        if strand in ["+", "-"]:
            whereClause.append("sense = '%s'" % strand)

        if len(whereClause) > 0:
            whereStatement = string.join(whereClause, " and ")
            whereQuery = "where %s" % whereStatement
        else:
            whereQuery = ""

        return whereQuery


    #TODO: Secondary to getReadsDict()
    def getReadSelectQuery(self, baseSelect, noSense, withWeight, withFlag, withMismatch, bothEnds=False):

        selectClause = [baseSelect]
        if bothEnds:
            selectClause.append("stop")

        if not noSense:
            selectClause.append("sense")

        if withWeight:
            selectClause.append("weight")

        if withFlag:
            selectClause.append("flag")

        if withMismatch:
            selectClause.append("mismatch")

        selectQuery = string.join(selectClause, ",")

        return selectQuery


    #TODO: Secondary to getReadsDict()
    def getReadGroupQuery(self, findallOptimize, limit, combine5p):
        groupBy = []
        if findallOptimize:
            groupBy = ["GROUP BY start, sense"]

        if limit > 0 and not combine5p:
            groupBy.append("LIMIT %d" % limit)

        groupQuery = string.join(groupBy)

        return groupQuery


    #TODO: deprecated
    #def getSplicesDict(self, noSense=False, fullChrom=False, chrom="",
    #                   flag="", withWeight=False, withFlag=False, withMismatch=False,
    #                   withID=False, withChrom=False, withPairID=False, readIDDict=False,
    #                   splitRead=False, hasMismatch=False, flagLike=False, start=-1,
    #                   stop=-1, strand=""):
    #    """ returns a dictionary of spliced reads in a variety of
    #    formats and which can be restricted by chromosome or custom-flag.
    #    Returns unique spliced reads for now.
    #    """
    #    whereQuery = self.getReadWhereQuery(chrom, flag, flagLike, start, stop, hasMismatch, strand, splice=True)
    #    selectClause = "select ID, chrom, startL, stopL, startR, stopR, readID"
    #    selectQuery = self.getReadSelectQuery(selectClause, noSense, withWeight, withFlag, withMismatch)
    #    sql = self.getSqlCursor()

    #    stmt = "%s from splices %s order by chrom, startL" % (selectQuery, whereQuery)
    #    sql.execute(stmt)
    #    currentReadID = ""
    #    currentChrom = ""
    #    resultsDict = {}
    #    for row in sql:
    #        pairID = 0
    #        readID = row["readID"]
    #        if fullChrom:
    #            chrom = row["chrom"]
    #        else:
    #            chrom = row["chrom"][3:]

    #        if not readIDDict and chrom != currentChrom:
    #            resultsDict[chrom] = []
    #            currentChrom = chrom
    #            dictKey = chrom
    #        elif readIDDict:
    #            if "/" in readID:
    #                (theReadID, pairID) = readID.split("/")
    #            else:
    #                theReadID = readID

    #            if theReadID != currentReadID:
    #                resultsDict[theReadID] = []
    #                currentReadID = theReadID
    #                dictKey = theReadID

    #        newrow = {"startL": int(row["startL"])}
    #        newrow["stopL"] = int(row["stopL"])
    #        newrow["startR"] = int(row["startR"])
    #        newrow["stopR"] = int(row["stopR"])
    #        if not noSense:
    #            newrow["sense"] = row["sense"]

    #        if withWeight:
    #            newrow["weight"] = float(row["weight"])

    #        if withFlag:
    #            newrow["flag"] = row["flag"]

    #        if withMismatch:
    #            newrow["mismatch"] = row["mismatch"]

    #        if withID:
    #            newrow["readID"] = readID

    #        if withChrom:
    #            newrow["chrom"] = chrom

    #        if withPairID:
    #            newrow["pairID"] = pairID

    #        if splitRead:
    #            leftDict = newrow.copy()
    #            del leftDict["startR"]
    #            del leftDict["stopR"]
    #            rightDict = newrow
    #            del rightDict["startL"]
    #            del rightDict["stopL"]
    #            resultsDict[dictKey].append(leftDict)
    #            resultsDict[dictKey].append(rightDict)
    #        else:
    #            resultsDict[dictKey].append(newrow)

    #    return resultsDict


    def getSplicesDict(self, noSense=False, fullChrom=False, chrom="",
                       flag="", withWeight=False, withFlag=False, withMismatch=False,
                       withID=False, withChrom=False, withPairID=False, readIDDict=False,
                       splitRead=False, hasMismatch=False, flagLike=False, start=None,
                       stop=None, strand=""):
        """ First pass of rewrite
            1) Leave as much in place as possible
            2) For now treat multis just like regular splices
        """
        """
        whereQuery = self.getReadWhereQuery(chrom, flag, flagLike, start, stop, hasMismatch, strand, splice=True)
        selectClause = "select ID, chrom, startL, stopL, startR, stopR, readID"
        selectQuery = self.getReadSelectQuery(selectClause, noSense, withWeight, withFlag, withMismatch)
        sql = self.getSqlCursor()

        stmt = "%s from splices %s order by chrom, startL" % (selectQuery, whereQuery)
        sql.execute(stmt)
        """

        # Edits are starting here - trying to ignore the entire sql construct
        bamFileIterator = self.bamfile.fetch(reference=chrom, start=start, end=stop)
        resultsDict = {}
        pairID = 0
        for read in bamFileIterator:
            if not isSpliceEntry(read.cigar):
                continue

            sense = getReadSense(read.is_reverse)
            readID = read.qname
            if fullChrom:
                chrom = self.bamfile.getrname(read.rname)
            else:
                chrom = self.bamfile.getrname(read.rname)[3:]

            if readIDDict:
                theReadID = readID
                if "/" in theReadID and withPairID:
                    (theReadID, pairID) = readID.split("/")

                dictKey = theReadID
            else:
                dictKey = chrom

            if self.readsize is None:
                self.calculateReadsizeFromCigar(read.cigar)

            startL, startR, stopL, stopR = getSpliceBounds(read.pos, self.readsize, read.cigar)
            newrow = {"startL": int(startL)}
            newrow["stopL"] = int(stopL)
            newrow["startR"] = int(startR)
            newrow["stopR"] = int(stopR)
            if not noSense:
                newrow["sense"] = sense

            if withWeight:
                newrow["weight"] = 1.0

            if withFlag:
                #newrow["flag"] = row["flag"]
                pass

            if withMismatch:
                try:
                    mismatchTag = read.opt("MD")
                    mismatches = getMismatches(mismatchTag, read.seq, sense)
                except KeyError:
                    mismatches = ""

                newrow["mismatch"] = mismatches

            if withID:
                newrow["readID"] = readID

            if withChrom:
                newrow["chrom"] = chrom

            if withPairID:
                newrow["pairID"] = pairID

            if splitRead:
                leftDict = newrow.copy()
                del leftDict["startR"]
                del leftDict["stopR"]
                rightDict = newrow
                del rightDict["startL"]
                del rightDict["stopL"]
                try:
                    resultsDict[dictKey].append(leftDict)
                except KeyError:
                    resultsDict[dictKey] = [leftDict]

                resultsDict[dictKey].append(rightDict)
            else:
                try:
                    resultsDict[dictKey].append(newrow)
                except KeyError:
                    resultsDict[dictKey] = [newrow]

        return resultsDict


    #TODO: deprecated
    #def getCounts(self, chrom="", rmin="", rmax="", uniqs=True, multi=False,
    #              splices=False, reportCombined=True, sense="both"):
    #    """ return read counts for a given region.
    #    """
    #    ucount = 0
    #    mcount = 0
    #    scount = 0
    #    restrict = ""
    #    if sense in ["+", "-"]:
    #        restrict = " sense ='%s' " % sense

    #    if uniqs:
    #        try:
    #            ucount = float(self.getUniqsCount(chrom, rmin, rmax, restrict))
    #        except:
    #            ucount = 0

    #    if multi:
    #        try:
    #            mcount = float(self.getMultiCount(chrom, rmin, rmax, restrict))
    #        except:
    #            mcount = 0

    #    if splices:
    #        try:
    #            scount = float(self.getSplicesCount(chrom, rmin, rmax, restrict))
    #        except:
    #            scount = 0

    #    if reportCombined:
    #        total = ucount + mcount + scount
    #        return total
    #    else:
    #        return (ucount, mcount, scount)


    def getFullReadCount(self):
        fullReadCount = {"uniq": 0,
                         "multi": 0,
                         "splice": 0
        }

        for chromosome in self.getChromosomes():
            uniqCount, multiCount, spliceCount = self.getCounts(chrom=chromosome, reportCombined=False, multi=True, splices=True)
            fullReadCount["uniq"] += uniqCount
            fullReadCount["multi"] += multiCount
            fullReadCount["splice"] += spliceCount

        return fullReadCount


    def getCounts(self, chrom=None, rmin=None, rmax=None, uniqs=True, multi=False,
                  splices=False, reportCombined=True, sense="both"):
        """ return read counts for a given region.
        """
        ucount = 0
        mcount = 0
        scount = 0
        readCounter = self.getBamReadCounter(chrom=chrom, rmin=rmin, rmax=rmax, sense=sense)

        if uniqs:
            ucount = readCounter.uniqReadCount

        if multi:
            mcount = readCounter.multiReadCount

        if splices:
            scount = readCounter.spliceReadCount

        if reportCombined:
            total = ucount + mcount + scount
            return total
        else:
            return (ucount, mcount, scount)


    def getBamReadCounter(self, chrom="", rmin=None, rmax=None, sense="both"):
        readCounter = ReadCounter(self.multiReadIDCounts, sense)
        self.bamfile.fetch(reference=chrom, start=rmin, end=rmax, callback=readCounter)

        return readCounter

    # Not called
    def getTotalCounts(self, chrom="", rmin="", rmax=""):
        """ return read counts for a given region.
        """
        return self.getCounts(chrom, rmin, rmax, uniqs=True, multi=True, splices=True, reportCombined=True, sense="both")


    #TODO: Secondary to getCounts() - not needed on bam conversion
    def getTableEntryCount(self, table, chrom="", rmin="", rmax="", restrict="", distinct=False, startField="start"):
        """ returns the number of row in the specified table.
        """
        whereClause = []
        count = 0

        if chrom !=""  and chrom != self.memChrom:
            whereClause = ["chrom='%s'" % chrom]

        if rmin != "":
            whereClause.append("%s >= %s" % (startField, str(rmin)))

        if rmax != "":
            whereClause.append("%s <= %s" % (startField, str(rmax)))

        if restrict != "":
            whereClause.append(restrict)

        if len(whereClause) > 0:
            whereStatement = string.join(whereClause, " and ")
            whereQuery = "where %s" % whereStatement
        else:
            whereQuery = ""

        sql = self.getSqlCursor()

        if distinct:
            sql.execute("select count(distinct chrom+%s+sense) from %s %s" % (startField, table, whereQuery))
        else:
            sql.execute("select sum(weight) from %s %s" % (table, whereQuery))

        result = sql.fetchone()

        try:
            count = int(result[0])
        except:
            count = 0

        return count


    #TODO: deprecated
    #def getSplicesCount(self, chrom="", rmin="", rmax="", restrict="", distinct=False):
    #    """ returns the number of row in the splices table.
    #    """
    #    spliceCount = 0
    #    if self.dataType == "RNA":
    #        spliceCount = self.getTableEntryCount("splices", chrom, rmin, rmax, restrict, distinct, startField="startL")

    #    return spliceCount


    def getSplicesCount(self, chrom=None, rmin=None, rmax=None, restrict="both", distinct=False):
        readCounter = self.getBamReadCounter(chrom=chrom, rmin=rmin, rmax=rmax, sense=restrict)

        return readCounter.spliceReadCount

    #TODO: deprecated
    #def getUniqsCount(self, chrom="", rmin="", rmax="", restrict="", distinct=False):
    #    """ returns the number of distinct readIDs in the uniqs table.
    #    """
    #    return self.getTableEntryCount("uniqs", chrom, rmin, rmax, restrict, distinct)


    def getUniqsCount(self, chrom=None, rmin=None, rmax=None, restrict="both", distinct=False):
        readCounter = self.getBamReadCounter(chrom=chrom, rmin=rmin, rmax=rmax, sense=restrict)

        return readCounter.uniqReadCount


    #TODO: deprecated
    #def getMultiCount(self, chrom="", rmin="", rmax="", restrict="", distinct=False):
    #    """ returns the total weight of readIDs in the multi table.
    #    """
    #    return self.getTableEntryCount("multi", chrom, rmin, rmax, restrict, distinct)


    def getMultiCount(self, chrom="", rmin="", rmax="", restrict="", distinct=False):
        readCounter = self.getBamReadCounter(chrom=chrom, rmin=rmin, rmax=rmax, sense=restrict)

        return readCounter.multiReadCount


    #TODO: Primary
    def getReadIDs(self, uniqs=True, multi=False, splices=False, paired=False, limit=-1):
        """ get readID's.
        """
        stmt = []
        if uniqs:
            stmt.append("select readID from uniqs")

        if multi:
            stmt.append("select readID from multi")

        if splices:
            stmt.append("select readID from splices")

        if len(stmt) > 0:
            selectPart = string.join(stmt, " union ")
        else:
            selectPart = ""

        limitPart = ""
        if limit > 0:
            limitPart = "LIMIT %d" % limit

        sqlQuery = "%s group by readID %s" % (selectPart, limitPart)
        sql = self.getSqlCursor()
        sql.execute(sqlQuery)
        result = sql.fetchall()

        if paired:
            return [x[0].split("/")[0] for x in result]
        else:
            return [x[0] for x in result]


    #TODO: Primary
    def getMismatches(self, mischrom=None, verbose=False, useSplices=True):
        """ returns the uniq and spliced mismatches in a dictionary.
        """
        readlen = self.getReadSize()
        if mischrom:
            hitChromList = [mischrom]
        else:
            hitChromList = self.getChromosomes()
            hitChromList.sort()

        snpDict = {}
        for chromosome in hitChromList:
            if verbose:
                print "getting mismatches from chromosome %s" % (chromosome)

            snpDict[chromosome] = []
            if useSplices and self.dataType == "RNA":
                spliceDict = self.getSplicesDict(fullChrom=True, chrom=chromosome, withMismatch=True, readIDDict=True, hasMismatch=True)
                spliceIDList = spliceDict.keys()
                for spliceID in spliceIDList:
                    spliceEntry = spliceDict[spliceID][0]
                    startpos = spliceEntry["startL"]
                    lefthalf = spliceEntry["stopL"]
                    rightstart = spliceEntry["startR"]
                    sense = spliceEntry["sense"]
                    mismatches = spliceEntry["mismatch"]
                    spMismatchList = mismatches.split(",")
                    for mismatch in spMismatchList:
                        if "N" in mismatch:
                            continue

                        change_len = len(mismatch)
                        if sense == "+":
                            change_from = mismatch[0]
                            change_base = mismatch[change_len-1]
                            change_pos = int(mismatch[1:change_len-1])
                        elif sense == "-":
                            change_from = getReverseComplement(mismatch[0])
                            change_base = getReverseComplement(mismatch[change_len-1])
                            change_pos = readlen - int(mismatch[1:change_len-1]) + 1

                        firsthalf = int(lefthalf)-int(startpos)+1
                        secondhalf = 0
                        if int(change_pos) <= int(firsthalf):
                            change_at = startpos + change_pos - 1
                        else:
                            secondhalf = change_pos - firsthalf
                            change_at = rightstart + secondhalf

                        snpDict[chromosome].append([startpos, change_at, change_base, change_from])

            hitDict = self.getReadsDict(fullChrom=True, chrom=chromosome, withMismatch=True, hasMismatch=True)
            if chromosome not in hitDict.keys():
                continue

            for readEntry in hitDict[chromosome]:
                start = readEntry["start"]
                sense = readEntry["sense"]
                mismatches = readEntry["mismatch"]
                mismatchList = mismatches.split(",")
                for mismatch in mismatchList:
                    if "N" in mismatch:
                        continue

                    change_len = len(mismatch)
                    if sense == "+":
                        change_from = mismatch[0]
                        change_base = mismatch[change_len-1]
                        change_pos = int(mismatch[1:change_len-1])
                    elif sense == "-":
                        change_from = getReverseComplement(mismatch[0])
                        change_base = getReverseComplement(mismatch[change_len-1])
                        change_pos = readlen - int(mismatch[1:change_len-1]) + 1

                    change_at = start + change_pos - 1
                    snpDict[chromosome].append([start, change_at, change_base, change_from])

        return snpDict


    #TODO: Primary
    def getChromProfile(self, chromosome, cstart=-1, cstop=-1, useMulti=True,
                        useSplices=False, normalizationFactor=1.0, trackStrand=False,
                        keepStrand="both", shiftValue=0):
        """return a profile of the chromosome as an array of per-base read coverage....
            keepStrand = 'both', 'plusOnly', or 'minusOnly'.
            Will also shift position of unique and multireads (but not splices) if shift is a natural number
        """
        metadata = self.getMetadata()
        try:
            readlen = int(metadata["readsize"])
        except KeyError:
            readlen = 0

        dataType = metadata["dataType"]
        scale = 1. / normalizationFactor
        shift = {}
        shift["+"] = int(shiftValue)
        shift["-"] = -1 * int(shiftValue)

        if cstop > 0:
            lastNT = self.getMaxCoordinate(chromosome, doMulti=useMulti, doSplices=useSplices) + readlen
        else:
            lastNT = cstop - cstart + readlen + shift["+"]

        chromModel = array("f",[0.] * lastNT)
        hitDict = self.getReadsDict(fullChrom=True, chrom=chromosome, withWeight=True, doMulti=useMulti, start=cstart, stop=cstop, findallOptimize=True)
        if cstart < 0:
            cstart = 0

        for readEntry in hitDict[chromosome]:
            hstart = readEntry["start"]
            sense =  readEntry ["sense"]
            weight = readEntry["weight"]
            hstart = hstart - cstart + shift[sense]
            for currentpos in range(hstart,hstart+readlen):
                try:
                    if not trackStrand or (sense == "+" and keepStrand != "minusOnly"):
                        chromModel[currentpos] += scale * weight
                    elif sense == "-" and keepStrand != "plusOnly":
                        chromModel[currentpos] -= scale * weight
                except:
                    continue

        del hitDict
        if useSplices and dataType == "RNA":
            if cstop > 0:
                spliceDict = self.getSplicesDict(fullChrom=True, chrom=chromosome, withID=True, start=cstart, stop=cstop)
            else:
                spliceDict = self.getSplicesDict(fullChrom=True, chrom=chromosome, withID=True)

            if chromosome in spliceDict:
                for spliceEntry in spliceDict[chromosome]:
                    Lstart = spliceEntry["startL"]
                    Lstop = spliceEntry["stopL"]
                    Rstart = spliceEntry["startR"]
                    Rstop = spliceEntry["stopR"]
                    rsense = spliceEntry["sense"]
                    if (Rstop - cstart) < lastNT:
                        for index in range(abs(Lstop - Lstart)):
                            currentpos = Lstart - cstart + index
                            # we only track unique splices
                            if not trackStrand or (rsense == "+" and keepStrand != "minusOnly"):
                                chromModel[currentpos] += scale
                            elif rsense == "-" and keepStrand != "plusOnly":
                                chromModel[currentpos] -= scale

                        for index in range(abs(Rstop - Rstart)):
                            currentpos = Rstart - cstart + index
                            # we only track unique splices
                            if not trackStrand or (rsense == "+" and keepStrand != "minusOnly"):
                                chromModel[currentpos] += scale
                            elif rsense == "-" and keepStrand != "plusOnly":
                                chromModel[currentpos] -= scale

            del spliceDict

        return chromModel


    #TODO: deprecated
    #def insertMetadata(self, valuesList):
    #    """ inserts a list of (pname, pvalue) into the metadata
    #    table.
    #    """
    #    self.dbcon.executemany("insert into metadata(name, value) values (?,?)", valuesList)
    #    self.dbcon.commit()


    def insertMetadata(self, valuesList):
        """ inserts a list of (pname, pvalue) into the metadata
        table.
        """
        for (pname, pvalue) in valuesList:
            self.metadata[pname] = pvalue



    def updateMetadata(self, pname, newValue, originalValue=""):
        """ update a metadata field given the original value and the new value.
        """
        stmt = "update metadata set value='%s' where name='%s'" % (str(newValue), pname)
        if originalValue != "":
            stmt += " and value='%s' " % str(originalValue)

        self.dbcon.execute(stmt)
        self.dbcon.commit()


    #TODO: Primary
    def insertUniqs(self, valuesList):
        """ inserts a list of (readID, chrom, start, stop, sense, weight, flag, mismatch)
        into the uniqs table.
        """
        self.dbcon.executemany("insert into uniqs(ID, readID, chrom, start, stop, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?)", valuesList)
        self.dbcon.commit()


    #TODO: Primary
    def insertMulti(self, valuesList):
        """ inserts a list of (readID, chrom, start, stop, sense, weight, flag, mismatch)
        into the multi table.
        """
        self.dbcon.executemany("insert into multi(ID, readID, chrom, start, stop, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?)", valuesList)
        self.dbcon.commit()


    #TODO: Primary
    def insertSplices(self, valuesList):
        """ inserts a list of (readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch)
        into the splices table.
        """
        self.dbcon.executemany("insert into splices(ID, readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?,?,?)", valuesList)
        self.dbcon.commit()


    #TODO: Primary
    def insertMultisplices(self, valuesList):
        """ inserts a list of (readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch)
        into the multisplices table.
        """
        self.dbcon.executemany("insert into multisplices(ID, readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?,?,?)", valuesList)
        self.dbcon.commit()


    #TODO: Primary
    def flagReads(self, regionsList, uniqs=True, multi=False, splices=False, sense="both"):
        """ update reads on file database in a list region of regions for a chromosome to have a new flag.
            regionsList must have 4 fields per region of the form (flag, chrom, start, stop) or, with
            sense set to '+' or '-', 5 fields per region of the form (flag, chrom, start, stop, sense).
        """
        restrict = ""
        if sense != "both":
            restrict = " and sense = ? "

        if uniqs:
            self.dbcon.executemany("UPDATE uniqs SET flag = ? where chrom = ? and start >= ? and start < ? " + restrict, regionsList)

        if multi:
            self.dbcon.executemany("UPDATE multi SET flag = ? where chrom = ? and start >= ? and start < ? " + restrict, regionsList)

        if self.dataType == "RNA" and splices:
            self.dbcon.executemany("UPDATE splices SET flag = flag || ' L:' || ? where chrom = ? and startL >= ? and startL < ? " + restrict, regionsList)
            self.dbcon.executemany("UPDATE splices SET flag = flag || ' R:' || ? where chrom = ? and startR >= ? and startR < ? " + restrict, regionsList)

        self.dbcon.commit()


    #TODO: Primary
    def setFlags(self, flag, uniqs=True, multi=True, splices=True):
        """ set the flag fields in the entire dataset.
        """
        if uniqs:
            self.setFlagsInDB("uniqs", flag)

        if multi:
            self.setFlagsInDB("multi", flag)

        if self.dataType == "RNA" and splices:
            self.setFlagsInDB("splices", flag)

        self.dbcon.commit()


    #TODO: Secondary to setFlags()
    def setFlagsInDB(self, table, flag):
        """ set the flag field for every entry in a table.
        """
        self.dbcon.execute("UPDATE %s SET flag = '%s'" % (table, flag))


    #TODO: Primary
    def resetFlags(self, uniqs=True, multi=True, splices=True):
        """ reset the flag fields in the entire dataset to clear. Useful for rerunning an analysis from scratch.
        """
        self.setFlags("", uniqs, multi, splices)


    #TODO: Primary
    def reweighMultireads(self, readList):
        self.dbcon.executemany("UPDATE multi SET weight = ? where chrom = ? and start = ? and readID = ? ", readList)


    def setSynchronousPragma(self, value="ON"):
        try:
            self.dbcon.execute("PRAGMA SYNCHRONOUS = %s" % value)
        except:
            print "warning: couldn't set PRAGMA SYNCHRONOUS = %s" % value


    #TODO: deprecated
    #def setDBcache(self, cache, default=False):
    #    self.dbcon.execute("PRAGMA CACHE_SIZE = %d" % cache)
    #    if default:
    #        self.dbcon.execute("PRAGMA DEFAULT_CACHE_SIZE = %d" % cache)


    def setDBcache(self, cache, default=False):
        pass


    def execute(self, statement, returnResults=False):
        sql = self.getSqlCursor()

        sql.execute(statement)
        if returnResults:
            result = sql.fetchall()
            return result


    def executeCommit(self, statement):
        self.execute(statement)

        if self.memBacked:
            self.memcon.commit()
        else:
            self.dbcon.commit()


    def buildIndex(self, cache=100000):
        """ Builds the file indeces for the main tables.
            Cache is the number of 1.5 kb pages to keep in memory.
            100000 pages translates into 150MB of RAM, which is our default.
        """
        if cache > self.getDefaultCacheSize():
            self.setDBcache(cache)
        self.setSynchronousPragma("OFF")
        self.dbcon.execute("CREATE INDEX uPosIndex on uniqs(chrom, start)")
        print "built uPosIndex"
        self.dbcon.execute("CREATE INDEX uChromIndex on uniqs(chrom)")
        print "built uChromIndex"
        self.dbcon.execute("CREATE INDEX mPosIndex on multi(chrom, start)")
        print "built mPosIndex"
        self.dbcon.execute("CREATE INDEX mChromIndex on multi(chrom)")
        print "built mChromIndex"

        if self.dataType == "RNA":
            self.dbcon.execute("CREATE INDEX sPosIndex on splices(chrom, startL)")
            print "built sPosIndex"
            self.dbcon.execute("CREATE INDEX sPosIndex2 on splices(chrom, startR)")
            print "built sPosIndex2"
            self.dbcon.execute("CREATE INDEX sChromIndex on splices(chrom)")
            print "built sChromIndex"

        self.dbcon.commit()
        self.setSynchronousPragma("ON")


    def dropIndex(self):
        """ drops the file indices for the main tables.
        """
        try:
            self.setSynchronousPragma("OFF")
            self.dbcon.execute("DROP INDEX uPosIndex")
            self.dbcon.execute("DROP INDEX uChromIndex")
            self.dbcon.execute("DROP INDEX mPosIndex")
            self.dbcon.execute("DROP INDEX mChromIndex")

            if self.dataType == "RNA":
                self.dbcon.execute("DROP INDEX sPosIndex")
                try:
                    self.dbcon.execute("DROP INDEX sPosIndex2")
                except:
                    pass

                self.dbcon.execute("DROP INDEX sChromIndex")

            self.dbcon.commit()
        except:
            print "problem dropping index"

        self.setSynchronousPragma("ON")


    def memSync(self, chrom="", index=False):
        """ makes a copy of the dataset into memory for faster access.
        Can be restricted to a "full" chromosome. Can also build the
        memory indices.
        """
        self.memcon = ""
        self.memcon = sqlite.connect(":memory:")
        self.initializeTables(self.memcon)
        cursor = self.dbcon.cursor()
        whereclause = ""
        if chrom != "":
            print "memSync %s" % chrom
            whereclause = " where chrom = '%s' " % chrom
            self.memChrom = chrom
        else:
            self.memChrom = ""

        self.memcon.execute("PRAGMA temp_store = MEMORY")
        self.memcon.execute("PRAGMA CACHE_SIZE = 1000000")
        # copy metadata to memory
        self.memcon.execute("delete from metadata")
        results = cursor.execute("select name, value from metadata")
        results2 = []
        for row in results:
            results2.append((row["name"], row["value"]))

        self.memcon.executemany("insert into metadata(name, value) values (?,?)", results2)

        self.copyDBEntriesToMemory("uniqs", whereclause)
        self.copyDBEntriesToMemory("multi", whereclause)
        if self.dataType == "RNA":
            self.copySpliceDBEntriesToMemory(whereclause)

        if index:
            if chrom != "":
                self.memcon.execute("CREATE INDEX uPosIndex on uniqs(start)")
                self.memcon.execute("CREATE INDEX mPosIndex on multi(start)")
                if self.dataType == "RNA":
                    self.memcon.execute("CREATE INDEX sPosLIndex on splices(startL)")
                    self.memcon.execute("CREATE INDEX sPosRIndex on splices(startR)")
            else:
                self.memcon.execute("CREATE INDEX uPosIndex on uniqs(chrom, start)")
                self.memcon.execute("CREATE INDEX mPosIndex on multi(chrom, start)")
                if self.dataType == "RNA":
                    self.memcon.execute("CREATE INDEX sPosLIndex on splices(chrom, startL)")
                    self.memcon.execute("CREATE INDEX sPosRIndex on splices(chrom, startR)")

        self.memBacked = True
        self.memcon.row_factory = sqlite.Row
        self.memcon.commit()


    def copyDBEntriesToMemory(self, dbName, whereClause=""):
        cursor = self.dbcon.cursor()
        sourceEntries = cursor.execute("select chrom, start, stop, sense, weight, flag, mismatch, readID from %s %s" % (dbName, whereClause))
        destinationEntries = []
        for row in sourceEntries:
            destinationEntries.append((row["readID"], row["chrom"], int(row["start"]), int(row["stop"]), row["sense"], row["weight"], row["flag"], row["mismatch"]))

        self.memcon.executemany("insert into %s(ID, readID, chrom, start, stop, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?)" % dbName, destinationEntries)


    def copySpliceDBEntriesToMemory(self, whereClause=""):
        cursor = self.dbcon.cursor()
        sourceEntries = cursor.execute("select chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch, readID from splices %s" % whereClause)
        destinationEntries = []
        for row in sourceEntries:
            destinationEntries.append((row["readID"], row["chrom"], int(row["startL"]), int(row["stopL"]), int(row["startR"]), int(row["stopR"]), row["sense"],
                                       row["weight"], row["flag"], row["mismatch"]))

        self.memcon.executemany("insert into splices(ID, readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?,?,?)", destinationEntries)


def getReadSense(reverse):
    if reverse:
        sense = "-"
    else:
        sense = "+"

    return sense


def isSpliceEntry(cigarTupleList):
    isSplice = False
    for operation,length in cigarTupleList:
        if operation == 3:
            isSplice = True
            break

    return isSplice


def getSpliceRightStart(start, cigarTupleList):
    offset = 0

    for operation,length in cigarTupleList:
        if operation == 3:
            stopL = int(start + offset)
            startR = int(stopL + length)

            return startR
        else:
            offset += length


def getSpliceBounds(start, readsize, cigarTupleList):
    offset = 0
    #TODO: check operations here - we want to make sure we are adding the correct ones
    for operation,length in cigarTupleList:
        if operation == 3:
            stopL = int(start + offset)
            startR = int(stopL + length)
            offset = 0
        else:
            offset += length

    stopR = startR + offset

    return start, startR, stopL, stopR


def getPairedReadNumberSuffix(read):
    readSuffix = ""
    if not isPairedRead(read):
        return ""

    if read.is_read1:
        readSuffix = "/1"
    elif read.is_read2:
        readSuffix = "/2"

    return readSuffix


def isPairedRead(read):
    return read.is_proper_pair and (read.is_read1 or read.is_read2)


def getMismatches(mismatchTag, querySequence="", sense="+"):
    output = []
    deletionMarker = "^"
    position = 0

    lengths = re.findall("\d+", mismatchTag)
    mismatchSequences = re.findall("\d+([ACGTN]|\\^[ACGTN]+)", mismatchTag)

    for mismatchEntry in range(len(mismatchSequences)):
        mismatch = mismatchSequences[mismatchEntry]
        position = position + int(lengths[mismatchEntry])
        if string.find(mismatch, deletionMarker) == 0:
            continue

        try:
            if querySequence:
                genomicNucleotide = querySequence[position]
            else:
                genomicNucleotide = "N"

            if sense == "-":
                mismatch = getReverseComplement(mismatch)
                genomicNucleotide  = getReverseComplement(genomicNucleotide)

            erange1BasedElandCompatiblePosition = int(position + 1)
            output.append("%s%d%s" % (mismatch, erange1BasedElandCompatiblePosition, genomicNucleotide))
            position += 1
        except IndexError:
            return ""

    return string.join(output, ",")