###########################################################################
#                                                                         #
# C O P Y R I G H T   N O T I C E                                         #
#  Copyright (c) 2003-10 by:                                              #
#    * California Institute of Technology                                 #
#                                                                         #
#    All Rights Reserved.                                                 #
#                                                                         #
# Permission is hereby granted, free of charge, to any person             #
# obtaining a copy of this software and associated documentation files    #
# (the "Software"), to deal in the Software without restriction,          #
# including without limitation the rights to use, copy, modify, merge,    #
# publish, distribute, sublicense, and/or sell copies of the Software,    #
# and to permit persons to whom the Software is furnished to do so,       #
# subject to the following conditions:                                    #
#                                                                         #
# The above copyright notice and this permission notice shall be          #
# included in all copies or substantial portions of the Software.         #
#                                                                         #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
# SOFTWARE.                                                               #
###########################################################################
#
import string
from cistematic.core.homology import homologyDB
from os import environ

if environ.get("CISTEMATIC_TEMP"):
    cisTemp = environ.get("CISTEMATIC_TEMP")
else:
    cisTemp = "/tmp"


def crossMatchGenes(matchFiles={}, prefix="", matchdir=cisTemp, filterCDS=False, filterLowerCase=False):
    """ save to one file per genome the genes that are considred homologs in match files from different species.
    """
    theGenomes = matchFiles.keys()
    outFileNames = {}
    genes = {}
    entryDict = {}
    ignoreList = {}
    for aGenome in theGenomes:
        outFileNames[aGenome] = "%s/%s-%s.hom" % (matchdir, prefix, aGenome)
        genes[aGenome] = []
        entryDict[aGenome] = {}
        ignoreList[aGenome] = []

    for aGenome in theGenomes:
        goodLines = []
        ignoreLines = []
        theFile = open(matchFiles[aGenome], "r")
        for entry in theFile:
            fields = entry.split("\t")
            gid = fields[11]
            if filterCDS and fields[6] == "CDS":
                ignoreLines.append(string.join(fields[:4], "-"))
                continue

            if fields[6] == "NONE":
                continue

            if filterLowerCase and fields[12] != fields[12].upper():
                continue

            goodLines.append(entry)

        theFile.close()
        for entry in goodLines:
            fields = entry.split()
            gid = fields[11]
            geneID = (aGenome, gid)
            matchID = string.join(fields[:4], "-")
            if matchID in ignoreLines:
                continue

            if geneID not in genes[aGenome]:
                genes[aGenome].append(geneID)

            if geneID not in entryDict[aGenome]:
                entryDict[aGenome][geneID] = []
                entryDict[aGenome][geneID].append(entry)

        print "Loaded %d %s entries" % (len(genes[aGenome]), aGenome)
        doCrossMatch(outFileNames, genes, entryDict, ignoreList)


def orthoMatcher(matchFiles={}, prefix="", matchdir=".", gidField=1, fileList=False):
    """ save to one file per genome the genes that are considred homologs in match files from different species.
    """
    theGenomes = matchFiles.keys()
    outFileNames = {}
    genes = {}
    entryDict = {}
    ignoreList = {}
    for aGenome in theGenomes:
        outFileNames[aGenome] = "%s/%s-%s.hom" % (matchdir, prefix, aGenome)
        genes[aGenome] = []
        entryDict[aGenome] = {}
        ignoreList[aGenome] = []

    for aGenome in theGenomes:
        theList = []
        if not fileList:
            theList = [matchFiles[aGenome]]
        else:
            theList = matchFiles[aGenome]

        for aFile in theList:
            theFile = open(aFile, "r")
            for entry in theFile:
                fields = entry.split()
                gid = fields[gidField]
                geneID = (aGenome, gid)
                if geneID not in genes[aGenome]:
                    genes[aGenome].append(geneID)

                if geneID not in entryDict[aGenome]:
                    entryDict[aGenome][geneID] = []
                    entryDict[aGenome][geneID].append(entry)

            theFile.close()

        print "Loaded %d %s entries" % (len(genes[aGenome]), aGenome)
        doCrossMatch(outFileNames, genes, entryDict, ignoreList)


def doCrossMatch(outFileNames, genes, entryDict, ignoreList):
    theGenomes = []
    outFile = {}
    for genome in outFileNames:
        theGenomes.append(genome)

    hdb = homologyDB(cache=True)
    for aGenome in theGenomes:
        outFile = open(outFileNames[aGenome], "w")
        counter = 0
        for geneID in genes[aGenome]:
            try:
                if geneID in ignoreList[aGenome]:
                    continue

                res = hdb.getHomologousGenes(geneID)
                newcounter = 0
                for entry in res:
                    if entry[0] in theGenomes:
                        secondGenome = entry[0]

                    if entry in genes[secondGenome]:
                        print "%d: %s %s" % (counter, str(geneID), str(entry))
                        if newcounter < 1:
                            for rec in entryDict[aGenome][geneID]:
                                outFile.write("%d\t%s\t%s" % (counter, str(geneID), rec))

                        for rec in entryDict[secondGenome][entry]:
                            outFile.write("%d\t%s\t%s" % (counter, str(entry), rec))

                        newcounter  += 1

                    if secondGenome == aGenome:
                        ignoreList[aGenome].append(entry)

                if newcounter > 0:
                    counter += 1
                    outFile.write("\n")
            except:
                print "Problem around %s" % (str(geneID))

        outFile.close()