#
#  geneNeighbors.py
#  ENRAGE
#

try:
    import psyco
    psyco.full()
except:
    pass

import sys
import optparse
from commoncode import getMergedRegions, getLocusByChromDict, getConfigParser, getConfigIntOption, getConfigBoolOption, getConfigOption
from cistematic.genomes import Genome
from commoncode import getGeneInfoDict

print "geneNeighbors: version 2.5" % sys.argv[0]


def main(argv=None):
    if not argv:
        argv = sys.argv

    usage = "usage: python %prog genome outfilename [--regions acceptfile] [--downstream bp] [--upstream bp] [--mindist bp] [--minlocus bp] [--maxlocus bp] [--samesense]"

    parser = getParser(usage)
    (options, args) = parser.parse_args(argv[1:])

    if len(args) < 2:
        print usage
        sys.exit(1)

    genome = args[0]
    outfilename = args[1]

    index = geneNeighbors(genome, outfilename, options.acceptFile, options.checkSense,
                          options.downMax, options.upMax, options.minDist, options.minLocus,
                          options.maxLocus)

    print "\n%d genes matched" % index


def getParser(usage):
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("--regions", dest="acceptFile")
    parser.add_option("--downstream", type="int", dest="downMax")
    parser.add_option("--upstream", type="int", dest="upMax")
    parser.add_option("--mindist", type="int", dest="minDist")
    parser.add_option("--minlocus", type="int", dest="minLocus")
    parser.add_option("--maxlocus", type="int", dest="maxLocus")
    parser.add_option("--samesense", action="store_true", dest="checkSense")

    configParser = getConfigParser()
    section = "geneNeighbors"
    acceptfile = getConfigOption(configParser, section, "acceptfile", "")
    checkSense = getConfigBoolOption(configParser, section, "checkSense", False)
    downMax = getConfigIntOption(configParser, section, "downMax", 10000000)
    upMax = getConfigIntOption(configParser, section, "upMax", 10000000)
    minDist = getConfigIntOption(configParser, section, "minDist", 0)
    minLocus = getConfigIntOption(configParser, section, "minLocus", -1)
    maxLocus = getConfigIntOption(configParser, section, "maxLocus", 10000000)

    parser.set_defaults(acceptfile=acceptfile, checkSense=checkSense, downMax=downMax,
                        upMax=upMax, minDist=minDist, minLocus=minLocus, maxLocus=maxLocus)

    return parser


def geneNeighbors(genome, outfilename, acceptfile="", checkSense=False,
                  downMax=10000000, upMax=10000000, minDist=0, minLocus=-1,
                  maxLocus=10000000):

    acceptDict = {}
    if acceptfile:
        acceptDict = getMergedRegions(acceptfile, maxDist=0, keepLabel=True, verbose=True)

    hg = Genome(genome)
    geneinfoDict = getGeneInfoDict(genome, cache=True)
    locusByChromDict = getLocusByChromDict(hg, additionalRegionsDict=acceptDict, keepSense=True)

    gidList = hg.allGIDs()
    gidList.sort()
    for chrom in acceptDict:
        for region in acceptDict[chrom]:
            if region.label not in gidList:
                gidList.append(region.label)

    index = 0
    outfile = open(outfilename,"w")
    chromList = locusByChromDict.keys()
    chromList.sort()
    for chrom in chromList:
        if len(locusByChromDict[chrom]) < 3 or "NT" in chrom or "MT" in chrom:
            continue

        print chrom + " ",
    
        prevStop = locusByChromDict[chrom][0][1]
        prevGID = locusByChromDict[chrom][0][2]
        if "FAR" not in prevGID:
            symbol = "LOC" + prevGID
            geneinfo = ""
            try:
                geneinfo = geneinfoDict[prevGID]
                symbol = geneinfo[0][0]
            except:
                pass
        else:
            symbol = prevGID

        prevGID = symbol
        prevSense = locusByChromDict[chrom][0][4]

        currentStart = locusByChromDict[chrom][1][0]
        currentStop = locusByChromDict[chrom][1][1]
        currentGID = locusByChromDict[chrom][1][2]
        if "FAR" not in currentGID:
            symbol = "LOC" + currentGID
            geneinfo = ""
            try:
                geneinfo = geneinfoDict[currentGID]
                symbol = geneinfo[0][0]
            except:
                pass
        else:
            symbol = currentGID

        currentGID = symbol
        currentGlen = locusByChromDict[chrom][1][3]
        currentSense = locusByChromDict[chrom][1][4] 

        for (nextStart, nextStop, nextGID, nextGlen, nextSense) in locusByChromDict[chrom][2:]:
            if "FAR" not in nextGID:
                symbol = "LOC" + nextGID
                geneinfo = ""
                try:
                    geneinfo = geneinfoDict[nextGID]
                    symbol = geneinfo[0][0]
                except:
                    pass
            else:
                symbol = nextGID

            nextGID = symbol
            leftDist = currentStart - prevStop
            rightDist = nextStart - currentStop
            if (currentSense == "F" and minDist < leftDist < upMax and minDist < rightDist < downMax) or (currentSense == "R" and minDist < rightDist < upMax and minDist < leftDist < downMax):
                if not checkSense or currentSense == nextSense:
                    if minLocus <= currentGlen <= maxLocus:
                        outfile.write("%s\t%s\t%s\t%s\t%d\t%s\t%s\t%d\n" % (currentGID, currentSense, prevGID, prevSense, leftDist, nextGID, nextSense, rightDist))
                        index += 1

            prevStop = currentStop
            prevGID = currentGID
            prevSense = currentSense
            currentStart = nextStart
            currentStop = nextStop
            currentGID = nextGID
            currentGlen = nextGlen
            currentSense = nextSense

    outfile.close()
    return index


if __name__ == "__main__":
    main(sys.argv)