###########################################################################
#                                                                         #
# C O P Y R I G H T   N O T I C E                                         #
#  Copyright (c) 2003-10 by:                                              #
#    * California Institute of Technology                                 #
#                                                                         #
#    All Rights Reserved.                                                 #
#                                                                         #
# Permission is hereby granted, free of charge, to any person             #
# obtaining a copy of this software and associated documentation files    #
# (the "Software"), to deal in the Software without restriction,          #
# including without limitation the rights to use, copy, modify, merge,    #
# publish, distribute, sublicense, and/or sell copies of the Software,    #
# and to permit persons to whom the Software is furnished to do so,       #
# subject to the following conditions:                                    #
#                                                                         #
# The above copyright notice and this permission notice shall be          #
# included in all copies or substantial portions of the Software.         #
#                                                                         #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
# SOFTWARE.                                                               #
###########################################################################
#
# basic analysis code
from cistematic.core.motif import matrixRow
import cistematic.core
import string

try:
    from pysqlite2 import dbapi2 as sqlite
except:
    from sqlite3 import dbapi2 as sqlite
from os import environ

if environ.get("CISTEMATIC_ROOT"):
    cisRoot = environ.get("CISTEMATIC_ROOT") 
else:
    cisRoot = "/proj/genome"

knownMotifsDB = "%s/db/known_motifs.db" % cisRoot


class AnalyzeMotifs:
    motifSize = {}
    coappear  = {}
    motifToAnnotations = {}
    annotationEntries = []
    genomeMatches = {}
    loadedGenomes = []
    dbName = ""
    analysisID = "default"


    def initializeKnownMotifs(self, source=""):
        """  loads known motifs from motif database, optionally limiting by source.
        """
        self.annotationEntries = []
        dbkm = sqlite.connect(knownMotifsDB, timeout=60)
        sqlkm = dbkm.cursor()
        if len(source) > 0:
            sqlkm.execute("select * from motifs where source = '%s' " % source)
        else:
            sqlkm.execute("select * from motifs")

        dbEntries = sqlkm.fetchall()
        dbkm.close()

        for entry in dbEntries:
            (index, source, name, theseq, reference, other_info) = entry
            self.annotationEntries.append((index, source, name, theseq, reference, other_info))


    def findAnnotations(self, motID, thresh=-1, numberN=0):
        mot = self.findMotif(motID)
        if thresh < 0:
            thresh = self.threshold
    
        for entry in self.annotationEntries:
            (id, source, name, theseq, reference, other) = entry
            loc = mot.locateMotif(theseq, thresh, numberN)
            if len(loc) > 0 :
                self.motifToAnnotations[motID].append((source, name, theseq, reference, other, loc))

            if len(self.motifToAnnotations[motID]) >= 500:
                break


    def findAnnotationsRE(self, motID, numMismatches=0):
        """ Calls the motif's consensus locator on each entry in annotationEntries.
        """
        mot = self.findMotif(motID)
        if numMismatches > 0:
            mot.initializeMismatchRE(numMismatches)
        else:
            mot.initializeRE()

        for entry in self.annotationEntries:
            (id, source, name, annotSeq, reference, other) = entry
            # Must use non-RE optimized version for short annotations
            if len(annotSeq) < len(mot):
                pad = "N" * (len(mot) - len(annotSeq))
                annotSeq = pad + annotSeq + pad
                loc = mot.locateConsensus(annotSeq)
            else:
                loc = mot.locateConsensusRE(annotSeq)

            if len(loc) > 0 :
                self.motifToAnnotations[motID].append((source, name, annotSeq, reference, other, loc))

            if len(self.motifToAnnotations[motID]) >= 500:
                break


    def printAnnotations(self):
        """ prints the motif annotations for every motif result.
        """
        for mot in self.getResults():
            annotations =   self.motifToAnnotations[mot.tagID]
            print "motif %s\t%s" % (mot.tagID, mot.buildConsensus())
            for annotation in annotations:
                (source, name, annotSeq, reference, other, loc) = annotation
                print "%s:%s\t%s\t%s\t%s" % (source, name, reference, annotSeq, str(loc))
                print other
            print


    def printAnnotationsShort(self):
        """ prints a compressed version of the annotations.
        """
        for motID in self.motifToAnnotations.keys():
            for annotation in self.motifToAnnotations[motID]:
                print "%s: %s (%s)\t%s - %s" % (annotation[0], annotation[1], annotation[4], annotation[2], annotation[5])


    def returnAnnotations(self, motID):
        """ returns the [annotations] for a particular motID
        """
        try:
            return self.motifToAnnotations[motID]
        except KeyError:
            return []


    def annotateMotifs(self, thresh=0.0, numberN=0):
        self.mlog( "Annotating Motifs with threshold %d and %d extra Ns" % (1.0 + thresh, numberN))
        if len(self.annotationEntries) == 0:
            self.initializeKnownMotifs()

        for mot in self.getResults():
            mot.setThreshold(thresh)
            self.motifToAnnotations[mot.tagID] = []
            self.findAnnotations(mot.tagID, thresh, numberN)


    def annotateConsensus(self, numMismatches=0, source=""):
        self.mlog( "Annotating Consensus")
        if len(self.annotationEntries) == 0:
            self.initializeKnownMotifs(source)

        for mot in self.getResults():
            self.motifToAnnotations[mot.tagID] = []
            self.findAnnotationsRE(mot.tagID, numMismatches)


    def printConsensus(self):
        """ print the consensus for every motif result.
        """
        for mot in self.getResults():
            print "motif %s\t%s" % (mot.tagID, mot.buildConsensus())


    def formatPWM(self, aPWM):
        """ format a PWM into a printable string.
        """
        aRow = ""
        cRow = ""
        gRow = ""
        tRow = ""
        for col in aPWM:
            aRow = string.join([aRow, str(round(col[matrixRow["A"]],4))], "\t")
            cRow = string.join([cRow, str(round(col[matrixRow["C"]],4))], "\t")
            gRow = string.join([gRow, str(round(col[matrixRow["G"]],4))], "\t")
            tRow = string.join([tRow, str(round(col[matrixRow["T"]],4))], "\t")

        formattedPWM =  "A:\t%s\nC:\t%s\nG:\t%s\nT:\t%s\n" % (aRow, cRow, gRow, tRow)

        return formattedPWM


    def appendGeneToMotif(self,mTagID, geneID, pos):
        """ make an entry in the geneToMotif table.
        """
        (genome, locus) = geneID
        (loc, sense) = pos
        stmt = "INSERT into geneToMotif(ID, expID, analysisID, mTagID, genome, locus, location, sense) values (NULL, '%s', '%s',  '%s', '%s', '%s', '%s', '%s')" % (self.experimentID, self.analysisID, mTagID, genome, locus, loc, sense)
        res = self.sql(stmt, "commit")


    def appendGeneToMotifBatch(self, batch):
        """ make a batch of entries in the geneToMotif table.
        """
        batchInserts = []
        stmt = "INSERT into geneToMotif(ID, expID, analysisID, mTagID, genome, locus, location, sense) values (NULL, ?, ?,  ?, ?, ?, ?, ?)" 
        for entry in batch:
            (mTagID, geneID, pos) = entry
            (genome, locus) = geneID
            (loc, sense) = pos
            batchInserts.append((self.experimentID, self.analysisID, mTagID, genome, locus, loc, sense))

        res = self.batchsql(stmt, batchInserts)

    
    def geneToMotifKeys(self, thekey="geneID"):
        """ return the keys to the geneToMotif table. The default key is geneID, otherwise returns mTagID.
        """
        results = []
        if thekey == "geneID":
            stmt = "SELECT distinct genome, locus from geneToMotif where expID = '%s' and analysisID = '%s' order by locus" % (self.experimentID, self.analysisID)
        else:
            stmt = "SELECT distinct mTagID from geneToMotif where expID = '%s' and analysisID = '%s' order by mTagID" % (self.experimentID, self.analysisID)

        res = self.sql(stmt)
        for entry in res:
            if thekey == "geneID":
                (genome, locus) = entry
                results.append((str(genome), str(locus)))
            else:
                mTagID = entry[0]
                results.append(str(mTagID))

        return results


    def appendMotifNeighbors(self,mTagID, match, condition="", geneEntry=""):
        """ make an entry in the motifNeighbors table.
        """
        (chromo, pos) = match
        (genome, chromNum) = chromo
        (loc, mSense) = pos
        if geneEntry != "":
            (start, stop, sense, geneID) = geneEntry
            (genome2, locus) = geneID
        else:
            start = "-"
            stop = "-"
            sense = "-"
            locus = "NO GENE IN RADIUS"

        stmt = "INSERT into motifNeighbors(ID, expID, analysisID, mTagID, genome, chromNum, location, motifSense, start, stop, sense, locus, condition) values (NULL, '%s','%s',  '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (self.experimentID, self.analysisID, mTagID, genome, chromNum, loc, mSense, start, stop, sense, locus, condition)
        res = self.sql(stmt, "commit")


    def appendMotifNeighborsBatch(self, batch):
        """ make a batch of entries in the motifNeighbors table.
        """
        batchInserts = []
        stmt = "INSERT into motifNeighbors(ID, expID, analysisID, mTagID, genome, chromNum, location, motifSense, start, stop, sense, locus, condition) values (NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
        for entry in batch:
            (mTagID, match, condition, geneEntry) = entry
            (chromo, pos) = match
            (genome, chromNum) = chromo
            (loc, mSense) = pos
            if geneEntry != "":
                (start, stop, sense, geneID) = geneEntry
                (genome2, locus) = geneID
            else:
                start = "-"
                stop = "-"
                sense = "-"
                locus = "NO GENE IN RADIUS"

            batchInserts.append((self.experimentID, self.analysisID, mTagID, genome, chromNum, loc, mSense, start, stop, sense, locus, condition))

        res = self.batchsql(stmt, batchInserts)


    def motifNeighborsKeys(self, condition=""):
        """ returns a [list of motif ID's] in the motifNeighbor table. Can restrict to a particular condition.
        """
        results = []
        if condition != "":
            stmt = "SELECT distinct mTagID from motifNeighbors where expID = '%s' and analysisID = '%s' and condition ='%s' order by mTagID" % (self.experimentID, self.analysisID, condition)
        else:
            stmt = "SELECT distinct mTagID from motifNeighbors where expID = '%s' and analysisID = '%s' order by mTagID" % (self.experimentID, self.analysisID)

        res = self.sql(stmt)
        for entry in res:
            mTagID = entry[0]
            results.append(mTagID)

        return results


    def motifNeighbors(self,mTagID, condition="", discardNullMatches=False):
        """ get entries in the geneToMotif table that match a particular Motif & condition.
        """
        results = []
        genome = ""
        chromNum = ""
        loc = ""
        mSense = ""
        start = ""
        stop = ""
        sense = ""
        locus = ""
        stmt = "select distinct genome, chromNum, location, motifSense, start, stop, sense, locus, condition from motifNeighbors where expID='%s' and analysisID = '%s' and mTagID='%s' " % (self.experimentID, self.analysisID, mTagID)
        if discardNullMatches:
            stmt += " and condition <> 'NONE' "

        if condition != '':
            stmt +=  " and condition = '%s' " % (condition)

        stmt += " order by ID desc "
        res = self.sql(stmt)

        for entry in res:
            (genome, chromNum, loc, mSense, start, stop, sense, locus, condition) = entry
            match = ((genome, chromNum),(int(loc), mSense))
            geneEntry = (start, stop, sense, (genome, locus))
            results.append((match, geneEntry, condition))

        return results


    def motifToGene(self, mTagID):
        """ returns a list of matching geneIDs with location and sense found for a given motif. 
        """
        results = []
        stmt = "SELECT distinct genome, locus, location, sense from geneToMotif where expID = '%s' and analysisID = '%s' and mTagID = '%s' order by locus" % (self.experimentID, self.analysisID, mTagID)
        res = self.sql(stmt)
        for entry in res:
            (genome, locus, location, sense) = entry
            results.append(((genome, locus), (int(location), sense)))

        return results


    def geneToMotif(self, geneID):
        """ returns a list of matching motifs with location and sense found for a given geneID
        """
        results = []
        (genome, locus) = geneID
        stmt = "SELECT distinct mTagID, location, sense from geneToMotif where expID = '%s' and analysisID = '%s' and genome = '%s' and locus = '%s' order by location" % (self.experimentID, self.analysisID, genome, locus)
        res = self.sql(stmt)
        for entry in res:
            (mTagID, location, sense) = entry
            results.append((mTagID, (location, sense)))

        return results


    def mapMotifs(self, Threshold=90.0, numberN=0, runList=[], ignoreList=[], enforceSanity=True, mapAllSeqs=True, verbose=True):
        """ find occurences of result motifs in the current genepool using PWMs. Slow.
        """
        currentGenome = ""
        currentDataset = -1
        genepoolKeys = self.genepool.keys()
        posResults = []
        for mot in self.getResults():
            mTagID = mot.tagID
            runID = mTagID.split("-")[0]
            (pName, dataID, setID, tStamp, motArray) = self.getRun(int(runID))
            if enforceSanity and not mot.isSane():
                self.mlog("mapMotifs: not mapping %s - failed sanity check" % (mTagID))
                continue

            if mot.tagID in ignoreList:
                self.mlog("mapMotifs: not mapping %s" % mot.tagID)
                continue

            if len(runList) > 0:
                if int(runID) not in runList:
                    self.mlog("mapMotifs: not mapping run %s" % runID)
                    continue

            if mapAllSeqs:
                dataID = 0

            self.mlog("mapMotifs: mapping %s with threshold %f and at most %d N" % (mTagID, Threshold, numberN))
            if dataID <> currentDataset:
                currentDataset = dataID
                for geneID in self.getSetting(dataID):
                    if geneID[0] <> currentGenome:
                        currentGenome = geneID[0]

                    try:
                        if geneID not in genepoolKeys:
                            self.genepool[geneID] = cistematic.core.retrieveSeq(geneID, self.upstream, self.cds, self.downstreamself.geneDB, False, self.boundToNextGene)
                            genepoolKeys.append[geneID]
                    except:
                        self.mlog("mapMotifs: could not load %s" % (str(geneID)))
                        break

            for geneID in genepoolKeys:
                if verbose:
                    print "mapMotifs: %s\n" % str(geneID)

                motifPos = mot.locateMotif(self.genepool[geneID], Threshold, numberN)

                if len(motifPos) > 0:
                    for pos in motifPos:
                        posResults.append((mTagID, geneID, pos))

        self.appendGeneToMotifBatch(posResults)


    def mapConsensus(self, runList=[], ignoreList=[], enforceSanity=True, mapAllSeqs=True, numMismatches=0):
        """ find occurences of result motifs in the current genepool using regular expressions, allowing 
            for a number of mismatches.
        """
        currentGenome = ""
        currentDataset = -1
        genepoolKeys = self.genepool.keys()
        posResults = []
        for mot in self.getResults():
            mTagID = mot.tagID
            runID = mTagID.split("-")[0]
            (pName, dataID, setID, tStamp, motArray) = self.getRun(int(runID))
            if mot.tagID in ignoreList:
                self.mlog("mapConsensus: not mapping %s" % mot.tagID)
                continue

            if len(runList) > 0:
                if int(runID) not in runList:
                    self.mlog("mapConsensus: not mapping run %s" % runID)
                    continue

            if enforceSanity and not mot.isSane():
                self.mlog("mapConsensus: not mapping %s - failed sanity check" % (mTagID))
                continue

            if mapAllSeqs:
                dataID = 0

            if numMismatches > 0:
                mot.initializeMismatchRE(numMismatches)
            else:
                mot.initializeRE()
            self.mlog("mapConsensus: mapping %s with %s mismatches" % (mTagID, numMismatches))
            if dataID <> currentDataset:
                currentDataset = dataID
                for geneID in self.getSetting(dataID):
                    if geneID[0] <> currentGenome:
                        currentGenome = geneID[0]

                    try:
                        if geneID not in genepoolKeys:
                            self.genepool[geneID] = cistematic.core.retrieveSeq(geneID, self.upstream, self.cds, self.downstream)
                            genepoolKeys.append[geneID]
                    except:
                        self.mlog("mapConsensus: could not load %s" % (str(geneID)))
                        break

            for geneID in genepoolKeys:
                motifPos = mot.locateConsensusRE(self.genepool[geneID])

                if len(motifPos) > 0:
                    for pos in motifPos:
                        posResults.append((mTagID, geneID, pos))

        self.appendGeneToMotifBatch(posResults)


    def mapFeatures(self, radius):
        """ returns features within a certain radius in bp of all matches.
        """
        FeatureList = []
        for mot in self.getResults():
            mTagID = mot.tagID
            self.mlog("mapFeatures: mapping %s using a radius of %d bp" % (mTagID, radius))
            for match in self.motifToGene(mTagID):
                matchFeatures = cistematic.core.retrieveFeatures(match, radius)
                for entry in matchFeatures:
                    FeatureList.append((mTagID, match, entry))

        return FeatureList


    def limitGeneEntries(self, geneList, lowerBound, upperBound):
        results = []
        for entry in geneList:
            if entry[1] < lowerBound or entry[0] > upperBound:
                continue

            results.append(entry)

        return results
    

    def mapNeighbors(self, radius, annotate=True):
        """ returns genes on a chromosome within a certain radius in bp.
        """
        localGeneList = {}
        prevChromo = ""
        neighborResults = []         
        for mot in self.getResults():
            mTagID = mot.tagID
            motLen = len(mot)
            motRadius = motLen / 2
            mtgList = self.motifToGene(mTagID)
            self.mlog("mapNeighbors: mapping %s using a radius of %d bp (%d instances)" % (mTagID, radius, len(mtgList)))
            index = 0
            for match in mtgList:
                (chromo, hit) = match
                if annotate and chromo != prevChromo:
                    prevChromo = chromo
                    localGeneList = cistematic.core.getChromoGeneEntries(chromo)

                index += 1
                if (index % 1000) == 0:
                    print "."

                matchCounter = 0
                matchCDS = []
                if annotate:
                    (chromo, hit) = match
                    matchpos = int(hit[0])
                    lowerBound = matchpos - radius
                    upperBound = matchpos + radius
                    match2 = (chromo, (matchpos + motRadius, hit[1]))
                    matchFeatures = cistematic.core.retrieveFeatures(match2, motRadius, "CDS")
                    for entry in matchFeatures:
                        matchCDS.append((chromo[0], entry[0]))

                    geneEntriesList = self.limitGeneEntries(localGeneList, lowerBound, upperBound)
                    for geneEntry in geneEntriesList:
                        beg = int(geneEntry[0])
                        end = int(geneEntry[1])
                        sense = geneEntry[2]
                        gID = geneEntry[3]
                        if gID in matchCDS: # matching within the coding sequence
                            neighborResults.append((mTagID, match, "CDS", geneEntry))
                            matchCounter += 1
                        elif matchpos >= beg and matchpos <= end: # not in CDS, but in gene
                            neighborResults.append((mTagID, match, "GENE", geneEntry))
                            matchCounter += 1
                        elif matchpos < beg: 
                            if sense == "F":
                                neighborResults.append((mTagID, match, "UPSTREAM", geneEntry))
                            else:
                                neighborResults.append((mTagID, match, "DOWNSTREAM", geneEntry))

                            matchCounter += 1
                        else:
                            if sense == "F":
                                neighborResults.append((mTagID, match, "DOWNSTREAM", geneEntry))
                            else:
                                neighborResults.append((mTagID,match, "UPSTREAM", geneEntry))

                            matchCounter += 1

                if matchCounter < 1:
                    neighborResults.append((mTagID, match, "NONE", ""))

        self.appendMotifNeighborsBatch(neighborResults)


    def printMotifToGene(self, motifList=[], runList=[]):
        motKeys = self.geneToMotifKeys(thekey="mTagID")
        if len(motifList) == 0 and len(runList) == 0:
            for tag in motKeys:
                print "Motif %s is found in: %s" % (tag, str(self.motifToGene(tag)))
        else:
            for tag in motKeys:
                runID = tag.split("-")[0]
                if len(runList) > 0:
                    if runID not in runList:
                        continue

                if tag in motifList:
                    print "Motif %s is found in: %s" % (tag, str(self.motifToGene(tag)))


    def motifToGeneStat(self):
        tags = self.geneToMotifKeys(thekey="mTagID")
        counter = 0
        min = len(self.motifToGene(tags[0]))
        max = 0
        for tag in tags:
            numGenes = len(self.motifToGene(tag))
            print "%s: %d" % (tag, numGenes)
            if numGenes > max:
                max = numGenes
            elif numGenes < min:
                min = numGenes

            counter += numGenes

        print "for a total of %d matches - min: %d max: %d" % (counter, min, max)


    def printGeneToMotif(self, geneList = []):
        if len(geneList) == 0:
            for geneID in self.geneToMotifKeys():
                print "Gene %s has the following motifs: %s" % (str(geneID), str(self.geneToMotif(geneID)))
        else:
            for geneID in self.geneToMotifKeys():
                if geneID in geneList:
                    print "Gene %s has the following motifs: %s" % (str(geneID), str(self.geneToMotif(geneID)))


    def geneToMotifStat(self):
        genes = self.geneToMotifKeys()
        counter = 0
        min = len(self.geneToMotif(genes[0]))
        max = 0
        for gene in genes:
            numMotifs = len(self.geneToMotif(gene))
            print "%s - %s: %d" % (gene[0], gene[1], numMotifs)
            if numMotifs > max:
                max = numMotifs
            elif numMotifs < min:
                min = numMotifs

            counter += numMotifs

        print "for a total of %d matches - min: %d max: %d" % (counter, min, max)

    
    def printGeneProfile(self, geneID):
        print "\nMotif matches for %s " % (str(geneID))
        geneProfile = self.makeGeneProfile(geneID)
        positions = geneProfile.keys()
        if len(positions) > 0:
            positions.sort()
            for pos in positions:
                print "%s %s" % (str(pos), str(geneProfile[pos]))
        else:
            print "Gene had no matching motifs"


    def makeGeneProfile(self, geneID):
        geneBucket = {}
        geneMotifs = self.geneToMotif(geneID)
        if len(geneMotifs) > 0:
            for mot in geneMotifs:
                (motID, pos) = mot
                (loc, sense) = pos
                if int(loc) not in geneBucket.keys():
                    geneBucket[int(loc)] = []

                geneBucket[int(loc)].append(mot)

        return geneBucket


    def getMotifMatches(self, motifIDs=[]):
        results = {}
        if len(motifIDs) < 1:
            motifIDs = self.geneToMotifKeys(thekey="mTagID")

        for motID in motifIDs:
            results[motID] = []
            mot = self.findMotif(motID)
            motLength = len(mot)
            for match in self.motifToGene(motID):
                (chrom, hit) = match
                (pos, sense) = hit
                if sense == "F":
                    results[motID].append(self.genepool[chrom][pos:pos + motLength])
                else:
                    results[motID].append(cistematic.core.complement(self.genepool[chrom][pos:pos + motLength], motLength))

        return results


    def summarizeMotifs(self, fileName):
        """ saves the number of occurences and actual occurence PWMs of motifs to fileName.
        """
        motDict = {}
        motText = []
        try:
            if len(fileName) < 1:
                raise IOError

            outFile = open(fileName, "a")
            self.mlog("Saving motif summary")
            for motID in self.geneToMotifKeys(thekey="mTagID"):
                matchNum = 0
                mot = self.findMotif(motID)
                motLength = len(mot)
                motDict[motID] = []
                for index in range(motLength):
                    motDict[motID].append([0.0, 0.0, 0.0, 0.0])

                for match in self.motifToGene(motID):
                    matchNum += 1
                    (chromo, hit) = match
                    (pos, sense) = hit
                    if sense == "F":
                        matchSeq = self.genepool[chromo][pos:pos + motLength]
                    else:
                        matchSeq = cistematic.core.complement(self.genepool[chromo][pos: pos + motLength], motLength)

                    for index in range(motLength):
                        NT = matchSeq[index]
                        NT = NT.upper()
                        if NT in ["A", "C", "G", "T"]:
                            motDict[motID][index][matrixRow[NT]] += 1

                motLine = "motif %s\t %s matches\t'%s'\n %s\n" % (motID, str(matchNum), mot.buildConsensus(), self.formatPWM(motDict[motID]))
                motText.append(motLine)

            outFile.writelines(motText)
            outFile.close()
        except:
            self.mlog("Could not save motif summary to file %s\n" % fileName)


    def printMotifNeighbors(self):
        for motID in self.motifNeighborsKeys():
            print "Matches for motif %s" % motID
            currentHit = ()
            for entry in self.motifNeighbors(motID):
                if entry[0] != currentHit:
                    print "====================="
                    print "Match %s" % str(entry[0])
                    currentHit = entry[0]

                print "\tGene %s:" % str(entry[1])
                try:
                    goInfo = cistematic.core.getGOInfo(entry[1][3])
                    for entry in goInfo:
                        print "\t\t%s" % str(entry)
                except:
                    pass

                print "----------------"


    def summarizeMotifNeighbors(self, fileName):
        """ saves the number of occurences and PWMs of motifs within the gene
            neighborhood radius as mapped using mapNeighbors() to fileName.
        """
        motDict = {}
        motText = []
        try:
            if len(fileName) < 1:
                raise IOError
            outFile = open(fileName, "a")
            self.mlog("Saving neighbor summary")
            for motID in self.motifNeighborsKeys():
                matchNum = 0
                mot = self.findMotif(motID)
                motLength = len(mot)
                motDict[motID] = []
                for index in range(motLength):
                    motDict[motID].append([0.0, 0.0, 0.0, 0.0])

                currentMatch = ""
                for entry in self.motifNeighbors(motID, discardNullMatches = True):
                    if currentMatch != entry[0]:
                        currentMatch = entry[0]
                        (genechrom, loc) = entry[0]
                        (pos, sense) = loc
                        geneID = entry[1][3]
                        (geno, gene) = geneID
                        if gene != "NO GENE IN RADIUS":
                            matchNum += 1
                            if sense == "F":
                                matchSeq = self.genepool[genechrom][pos:pos + motLength]
                            else:
                                matchSeq = cistematic.core.complement(self.genepool[genechrom][pos: pos + motLength], motLength)

                            for index in range(motLength):
                                NT = matchSeq[index]
                                NT = NT.upper()
                                if NT in ["A", "C", "G", "T"]:
                                    motDict[motID][index][matrixRow[NT]] += 1

                motLine = "motif %s\t %s matches\n %s\n" % (motID, str(matchNum), self.formatPWM(motDict[motID]))
                motText.append(motLine)

            outFile.writelines(motText)
            outFile.close()
        except:
            self.mlog("Could not save motif neighbors to file %s\n" % fileName)


    def saveMotifNeighbors(self, fileName, fullAnnotations=True):
        """ save every occurence of the motifs with any adjoining gene(s) to fileName. 
            Records annotations and GO terms if available when fullAnnotations=True.
        """
        goDict = {}
        annotDict = {}
        currentGenome = ""
        neighborList = []
        self.mlog("Saving motif neighbors to file %s\n" % fileName)
        if True:
            if len(fileName) < 1:
                raise IOError
            outFile = open(fileName, "a")
            for motID in self.motifNeighborsKeys():
                matchNum = 0
                mot = self.findMotif(motID)
                motLength = len(mot)
                currentMatch = ""
                for entry in self.motifNeighbors(motID):
                    neighborList = []
                    if currentMatch != entry[0]:
                        matchNum += 1
                        currentMatch = entry[0]
                        (genechrom, loc) = entry[0]
                        (genome, chromo) = genechrom
                        (pos, sense) = loc
                        if fullAnnotations and genome != currentGenome:
                            currentGenome = genome
                            goDict = cistematic.core.getAllGOInfo(genome)
                            annotDict = cistematic.core.getAllAnnotInfo(genome)

                    start = entry[1][0]
                    stop = entry[1][1]
                    geneSense = entry[1][2]
                    geneID = entry[1][3]
                    (geno, gene) = geneID
                    condition = entry[2]
                    if sense == "F":
                        matchSeq = self.genepool[genechrom][pos:pos + motLength]
                    else:
                        matchSeq = cistematic.core.complement(self.genepool[genechrom][pos: pos + motLength], motLength)

                    currentEntry = "%s\t%i\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (motID, matchNum, genome, chromo, pos, sense, condition, start, stop, geneSense, geno, gene, matchSeq)
                    if fullAnnotations:
                        try:
                            goInfo = goDict.get(geneID, [])
                            annotInfo = annotDict.get(geneID,[])
                            annotDescription = ""
                            prevGoInfo = ""
                            if self.debugSQL:
                                print "%s\t%s\t%s" % (str(geneID), str(goInfo), str(annotInfo))
                            for annot in annotInfo:
                                annotDescription += annot + " "
                            if len(goInfo) > 0:
                                for entry in goInfo:
                                    if entry == prevGoInfo:
                                        continue
                                    neighborList.append("%s\t%s\t%s\n" % (currentEntry, annotDescription, entry))
                                    prevGoInfo = entry
                            else:
                                neighborList.append("%s\t%s\n" % (currentEntry, annotDescription))
                        except:
                            neighborList.append("%s\n" % currentEntry)
                    else:
                        neighborList.append("%s\n" % currentEntry)

                    outFile.writelines(neighborList)

            outFile.close()
        else:
            self.mlog("Could not save motif neighbors to file %s\n" % fileName)

        return neighborList


    def buildMotifSize(self):
        mots = self.getResults()
        index = 0
        for mot in mots:
            motLength = len(mot)
            if motLength not in self.motifSize.keys():
                self.motifSize[motLength] =[]

            self.motifSize[motLength].append((index, mot.buildConsensus()))
            index += 1


    def findIdenticalMotifs(self):
        # FIXME!!!!
        identicalMotifs = []

        for motLength in self.motifSize.keys():
            motList = self.motifSize[motLength]
            motListLen = len(motList)
            print "doing motif length %d" % motLength
            for pos in range(motListLen):
                index = pos + 1
                while index < motListLen:
                    if motList[pos][1] == motList[index][1]:
                        identicalMotifs.append((self.results[pos].tagID, self.results[index].tagID))

                    index += 1

        return identicalMotifs


    def createAnalysis(self, dbName=""):
        """ creates the analysis SQL tables. Should only be run once per database file.
        """
        if len(dbName) == 0:
            dbName = self.expFile

        db = sqlite.connect(dbName, timeout=60)
        try:    
            sql = db.cursor()
            sql.execute("CREATE table geneToMotif(ID INTEGER PRIMARY KEY, expID varchar, analysisID varchar, mTagID varchar, genome varchar, locus varchar, location varchar, sense varchar)")
            sql.execute("CREATE table genomeMatches(ID INTEGER PRIMARY KEY, expID varchar, analysisID varchar, mTagID varchar, genome varchar, locus varchar, location varchar, sense varchar, threshold varchar)")
            sql.execute("CREATE table motifNeighbors(ID INTEGER PRIMARY KEY, expID varchar, analysisID varchar, mTagID varchar, genome varchar, chromNum varchar, location varchar, motifSense varchar, start varchar, stop varchar, sense varchar, locus varchar, condition varchar)")
            sql.execute("CREATE index mot1 on geneToMotif(expID, analysisID, mTagID)")
            sql.execute("CREATE index mot2 on genomeMatches(expID, analysisID, mTagID)")
            sql.execute("CREATE index mot3 on motifNeighbors(expID, analysisID, mTagID)")
            sql.execute("CREATE index locus1 on geneToMotif(locus)")
            sql.execute("CREATE index locus2 on motifNeighbors(locus)")
            sql.execute("CREATE index condition1 on motifNeighbors(condition)")
            db.commit()
            sql.close()
            db.close()
            self.mlog("Created analysis tables in database %s" % dbName)
        except:
            db.close()
            self.mlog("Could not create tables in database %s" % dbName)
            self.mlog("WARNING: perhaps you have called createAnalysis twice on the same database?")


    def loadAnalysis(self, analysisID="default", dbName=""):
        """ changes the analysisID to use the specified one, or use default. Must be used before reading or
            writing any data to analysis tables.
        """
        self.analysisID = analysisID
        if len (dbName) > 0:
            self.dbName = dbName
        else:
            self.dbName = self.expFile


    def resetAnalysis(self):
        """ currently zeros out some of the analysis structures. obsolete ?
        """
        self.mlog("resetting analysis")
        self.motifSize = {}
        self.motifToAnnotations = {}
        self.coappear  = {}


    def saveAnalysis(self):
        """ currently obsolete - kept for backward compability.
        """
        try:
            self.mlog("saved analysis %s" % self.analysisID)
        except:
            self.mlog("could not save %s" % self.analysisID)


    def deleteAnalysis(self, analysisID="default"):
        """ delete all of the analysis either named aName, or matching
            an experiment (the current one by default.) Currently a NO-OP. Obsolete.
        """
        pass


    def buildCoappear(self, motifList=[], distance="100000", strict = False):
        """ Builds coappear dictionary of geneIDs where motifs occur with others. Can limit to
            motifs in motifList (default is all motifs), and by distance (default is 100,000 base pairs.) 
            Results are accessed through printCoappear() and saveCoappear().
        """
        occurenceList = {}
        self.coappear = {}
        processedMotifs = []
        motifListLen = len(motifList)
        if motifListLen == 0:
            #use all motifs
            motifList = self.geneToMotifKeys(thekey="mTagID")
            motifListLen = len(motifList)

        for motif in motifList:
            if motif not in processedMotifs:
                matchList = self.motifToGene(motif)
                for match in matchList:
                    (geneID, loc) = match
                    if geneID not in occurenceList:
                        occurenceList[geneID] = []

                    (location, sense) = loc
                    occurenceList[geneID].append((location, sense, motif))

            processedMotifs.append(motif)

        for geneID in occurenceList:
            occurenceList[geneID].sort()
            if geneID not in self.coappear:
                self.coappear[geneID] = []    

            coappearing = False
            differentMotifs = False
            coappearList = []
            prevOccurence = occurenceList[geneID][0]
            del occurenceList[geneID][0]
            for occurence in occurenceList[geneID]:
                if occurence[0] < prevOccurence[0] + distance:
                    coappearing = True
                    if occurence[2] != prevOccurence[2]:
                        differentMotifs = True

                    coappearList.append(prevOccurence)
                elif coappearing:
                    if strict:
                        if differentMotifs:
                            coappearList.append(prevOccurence)
                            self.coappear[geneID].append(coappearList)
                    else:
                        coappearList.append(prevOccurence)
                        self.coappear[geneID].append(coappearList)

                    coappearing = False
                    differentMotifs = False
                    coappearList = []

                prevOccurence = occurence

            if coappearing:
                if strict:
                    if differentMotifs:
                        coappearList.append(prevOccurence)
                        self.coappear[geneID].append(coappearList)
                else:
                    coappearList.append(prevOccurence)
                    self.coappear[geneID].append(coappearList)


    def printCoappear(self):
        """ prints a formatted version of the coappear dictionary built with buildCoappear()
        """
        for geneID in self.coappear:
            print " ===== %s =====" % str(geneID)
            for occurence in self.coappear[geneID]:
                print str(occurence)
            print " ============="


    def saveCoappear(self, fileName):
        """ save coappear dictionary in tabular format. Returns:
            index, genome, locus, pos, sense, tag in tab-separated format.
        """
        index = 0
        outLines = []
        if 1:
            if len(fileName) < 1:
                raise IOError

            outFile = open(fileName, "a")
            for geneID in self.coappear:
                (genome, locus) = geneID
                for occurence in self.coappear[geneID]:
                    index += 1
                    coappearLine = "%d\t%s\t%s" % (index, genome, locus)
                    for match in occurence:
                        (pos, sense, tag) = match
                        coappearLine += "\t%s\t%s\t%s" % (pos, sense, tag)

                    outLines.append(coappearLine + "\n")

            outFile.writelines(outLines)
            outFile.close()
        else:
            self.mlog("Could not save coappear to file %s\n" % fileName)


    def sql(self, stmt, commit=""):
        """ executes a SQL statement and does a commit if commit is not-null. returns any results as a list.
        """
        db = sqlite.connect(self.dbName, timeout=60)
        sqlc = db.cursor()
        if self.debugSQL:
            print "sql: %s" % stmt

        sqlc.execute(stmt)
        res = sqlc.fetchall()
        if commit != "":
            db.commit()

        sqlc.close()
        db.close()

        return res


    def batchsql(self, stmt, batch):
        """ executes a list of sql statements (usually inserts) stored in the list batch with a single commit.
        """
        res = []
        db = sqlite.connect(self.dbName, timeout=60)
        sqlc = db.cursor()
        if self.debugSQL:
            print "batchsql: %s" % stmt
            print "batchsql: %s" % str(batch)

        sqlc.executemany(stmt, batch)
        db.commit()
        sqlc.close()
        db.close()

        return res