###########################################################################
#                                                                         #
# C O P Y R I G H T   N O T I C E                                         #
#  Copyright (c) 2003-10 by:                                              #
#    * California Institute of Technology                                 #
#                                                                         #
#    All Rights Reserved.                                                 #
#                                                                         #
# Permission is hereby granted, free of charge, to any person             #
# obtaining a copy of this software and associated documentation files    #
# (the "Software"), to deal in the Software without restriction,          #
# including without limitation the rights to use, copy, modify, merge,    #
# publish, distribute, sublicense, and/or sell copies of the Software,    #
# and to permit persons to whom the Software is furnished to do so,       #
# subject to the following conditions:                                    #
#                                                                         #
# The above copyright notice and this permission notice shall be          #
# included in all copies or substantial portions of the Software.         #
#                                                                         #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
# SOFTWARE.                                                               #
###########################################################################
#
try:
    from pysqlite2 import dbapi2 as sqlite
except:
    from sqlite3 import dbapi2 as sqlite

import tempfile, shutil, os
from os import environ

if environ.get("CISTEMATIC_ROOT"):
    cisRoot = environ.get("CISTEMATIC_ROOT") 
else:
    cisRoot = "/proj/genome"

from cistematic.core.geneinfo import speciesMap

dbPath = "%s/db/homologene.db" % cisRoot
homologeneGenomes = ["hsapiens", "mmusculus", "rnorvegicus", "celegans",
                     "cbriggsae", "cremanei", "dmelanogaster", "athaliana",
                     "ggallus", "cfamiliaris", "drerio", "scerevisiae"]

if environ.get("CISTEMATIC_TEMP"):
    cisTemp = environ.get("CISTEMATIC_TEMP")
else:
    cisTemp = "/tmp"
tempfile.tempdir = cisTemp


class homologyDB:
    """ The homologyDB class allows for the mapping and return of predefined homology relationships.
    """
    startingGenome = ""
    targetGenomes  = []
    homologousGenes = {}    
    cachedDB = ""


    def __init__(self, tGenomes=[], cache=False):
        """ initialize the homologyDB object with a target genome and cache database, if desired. 
        """
        self.targetGenomes = tGenomes
        if cache:
            self.cacheDB()


    def __del__(self):
        """ cleanup copy in local cache, if present. 
        """
        if self.cachedDB != "":
            self.uncacheDB()


    def cacheDB(self):
        """ copy homologyDB to a local cache. 
        """
        self.cachedDB = "%s.db" % tempfile.mktemp()
        shutil.copyfile(dbPath, self.cachedDB)


    def uncacheDB(self):
        """ delete homologyDB from local cache. 
        """
        global cachedDB
        if self.cachedDB != "":
            try:
                os.remove(self.cachedDB)
            except:
                print "could not delete %s" % self.cachedDB

            self.cachedDB = ""


    def connectDB(self):
        """ return a handle to the database. 
        """
        path = dbPath
        if self.cachedDB != "":
            path = self.cachedDB

        return sqlite.connect(path, timeout=60)


    def loadDB(self):
        """ deprecated. 
        """
        pass


    def getHomologousGenes(self, geneID):
        """ return list of geneIDs homologous to given geneID. Limit to target genomes if specified at initialization. 
        """
        db = self.connectDB()
        cursor = db.cursor()
        results = []
        (gen, gid) = geneID
        cursor.execute("select homoloID from homolog where genome = :gen and gID = :gid", locals())
        groups = cursor.fetchall()
        for hIDentry in groups:
            homoloID = str(hIDentry[0])
            cursor.execute("select genome, gID from homolog where homoloID = :homoloID ", locals())
            genes = cursor.fetchall()
            for gene in genes:
                if gene != geneID:
                    (genome, gID) = gene
                    if len(self.targetGenomes) > 0:
                        if genome not in self.targetGenomes:
                            pass
                        else:
                            results.append((str(genome), str(gID)))
                    else:
                        results.append((str(genome), str(gID)))

            cursor.close()
            db.close()

        return results


def buildHomologeneDB(hFile="homologene.data", hdb=dbPath):
    """ Populate a new homologyDB database with homology relationships from homologene.
    """
    inFile = open(hFile)
    db = sqlite.connect(hdb)
    cursor = db.cursor()
    cursor.execute("create table homolog(ID INTEGER PRIMARY KEY, homoloID varchar, genome varchar, gID varchar)")
    for line in inFile:
        doInsert = False
        sqlstmt = "INSERT into homolog(ID, homoloID, genome, gID) values (NULL, ?, ?, ?)"
        field = line.split("\t")
        if field[1] in speciesMap:
            gid = field[2]
            if speciesMap[field[1]] == "arabidopsis":
                gid = field[3].upper()

            values =  ("homologene-%s" % field[0], speciesMap[field[1]], gid.strip())
            doInsert = True

        if doInsert:
            cursor.execute(sqlstmt, values)

    inFile.close()
    sqlstmt = "CREATE INDEX idx1 on homolog(genome, gID)"
    cursor.execute(sqlstmt)
    sqlstmt = "CREATE INDEX idx2 on homolog(homoloID)"
    cursor.execute(sqlstmt)
    db.commit()
    cursor.close()
    db.close()


def addHomologs(genomeA, genomeB, entries, hdb=dbPath):
    """ Specify homology relationships between geneIDs to be inserted into homology database.
        The entries list contains doubles of the form (gIDa, gIDb) from genome A and genome B, respectively.
    """
    mapping = {}
    for (geneID1, geneID2) in entries:
        mapping[geneID1] = geneID2

    if len(mapping) < 1:
        return

    db = sqlite.connect(hdb)
    sql = db.cursor()
    sql.execute('select * from homolog where genome = "%s" ' % genomeA)
    results = sql.fetchall()

    stmt = "insert into homolog(ID, homoloID, genome, gID) values (NULL, ?, ?, ?) "
    stmtArray = []

    for entry in results:
        (rowID, hID, genome, gID) = entry
        if gID in mapping:
            stmtArray.append((hID, genomeB, mapping[gID]))
            del mapping[gID]

    topHID = 0
    if len(stmtArray) > 0:
        print "Updating %d entries in homolog table" % len(stmtArray)
        sql.executemany(stmt, stmtArray)

    stmtArray = []
    for gID in mapping:
        topHID += 1
        homologID = "%s-%s-%s" % (genomeA, genomeB, str(topHID))
        stmtArray.append((homologID, genomeA, gID))
        stmtArray.append((homologID, genomeB, mapping[gID]))

    if len(mapping) > 0:
        print "Adding %d new homology entries" % len(mapping)
        sql.executemany(stmt, stmtArray)

    db.commit()
    sql.close()
    db.close()