try:
    import psyco
    psyco.full()
except:
    print "psyco not running"

import sys
import optparse
from commoncode import getGeneInfoDict, getConfigParser, getConfigOption
from cistematic.cisstat.analyzego import calculateGOStats

print "analyzego: version 2.2"

def main(argv=None):
    if not argv:
        argv = sys.argv

    usage = "usage: python %prog genome infilename prefix [--geneName] [--field fieldID]"

    parser = makeParser(usage)
    (options, args) = parser.parse_args(argv[1:])

    if len(args) < 3:
        print usage
        sys.exit(1)

    fieldID = 1
    if options.translateGene:
        fieldID = 0

    if options.fieldID is not None:
        fieldID = options.fieldID

    genome = args[0]
    infilename = args[1]
    prefix = args[2]

    analyzeGOFromFile(genome, infilename, prefix, options.translateGene, fieldID)


def makeParser(usage=""):
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("--geneName", action="store_true", dest="translateGene",
                      help="translate gene")
    parser.add_option("--field", type="int", dest="fieldID",
                      help="column containing gene ID/Name")

    configParser = getConfigParser()
    section = "analyzego"
    translateGene = getConfigOption(configParser, section, "translateGene", False)
    fieldID = getConfigOption(configParser, section, "fieldID", None)

    parser.set_defaults(translateGene=translateGene, fieldID=fieldID)

    return parser


def analyzeGOFromFile(genome, infilename, prefix, translateGene=False, fieldID=1):
    infile = open(infilename)
    analyzeGO(genome, infile, prefix, translateGene=False, fieldID=1)
    infile.close()


def analyzeGO(genome, geneInfoList, prefix, translateGene=False, fieldID=1):
    if translateGene:
        symbolToGidDict = getSymbolDict(genome)

    locusList = []
    for line in geneInfoList:
        fields = line.split()
        if translateGene:
            gene = fields[fieldID]
            if "LOC" in gene:
                gID = gene[3:]
            elif "FAR" in gene:
                print "ignoring %s" % gene
                continue
            else:
                try:
                    gID = symbolToGidDict[gene]
                except KeyError:
                    print "ignoring %s" % gene
                    continue
        else:
            gID = fields[fieldID]

        if (genome, gID) not in locusList:
            locusList.append((genome, gID))

    if len(locusList) > 0:
        calculateGOStats(locusList, prefix)


def getSymbolDict(genome):
    geneinfoDict = getGeneInfoDict(genome, cache=True)
    symbolToGidDict = {}
    for gid in geneinfoDict:
        symbol = geneinfoDict[gid][0][0].strip()
        symbolToGidDict[symbol] = gid

    return symbolToGidDict


if __name__ == "__main__":
    main(sys.argv)