"""
    usage: python $ERANGEPATH/findall.py label samplerdsfile regionoutfile
           [--control controlrdsfile] [--minimum minHits] [--ratio minRatio]
           [--spacing maxSpacing] [--listPeak] [--shift #bp | learn] [--learnFold num]
           [--noshift] [--autoshift] [--reportshift] [--nomulti] [--minPlus fraction]
           [--maxPlus fraction] [--leftPlus fraction] [--minPeak RPM] [--raw]
           [--revbackground] [--pvalue self|back|none] [--nodirectionality]
           [--strandfilter plus/minus] [--trimvalue percent] [--notrim]
           [--cache pages] [--log altlogfile] [--flag aflag] [--append] [--RNA]

           where values in brackets are optional and label is an arbitrary string.

           Use -ratio (default 4 fold) to set the minimum fold enrichment
           over the control, -minimum (default 4) is the minimum number of reads
           (RPM) within the region, and -spacing (default readlen) to set the maximum
           distance between reads in the region. -listPeak lists the peak of the
           region. Peaks mut be higher than -minPeak (default 0.5 RPM).
           Pvalues are calculated from the sample (change with -pvalue),
           unless the -revbackground flag and a control RDS file are provided.

           By default, all numbers and parameters are on a reads per
           million (RPM) basis. -raw will treat all settings, ratios and reported
           numbers as raw counts rather than RPM. Use -notrim to turn off region
           trimming and -trimvalue to control trimming (default 10% of peak signal)

           The peak finder uses minimal directionality information that can
           be turned off with -nodirectionality ; the fraction of + strand reads
           required to be to the left of the peak (default 0.3) can be set with
           -leftPlus ; -minPlus and -maxPlus change the minimum/maximum fraction
           of plus reads in a region, which (defaults 0.25 and 0.75, respectively).

           Use -shift to shift reads either by half the expected
           fragment length (default 0 bp) or '-shift learn ' to learn the shift
           based on the first chromosome. If you prefer to learn the shift
           manually, use -autoshift to calculate a per-region shift value, which
           can be reported using -reportshift. -strandfilter should only be used
           when explicitely calling unshifted stranded peaks from non-ChIP-seq
           data such as directional RNA-seq. regionoutfile is written over by
           default unless given the -append flag.
"""

try:
    import psyco
    psyco.full()
except:
    pass

import sys
import math
import string
import optparse
from commoncode import writeLog, findPeak, getBestShiftForRegion, getConfigParser, getConfigOption, getConfigIntOption, getConfigFloatOption, getConfigBoolOption
import ReadDataset
import Region


versionString = "findall: version 3.2"
print versionString

def usage():
    print __doc__


def main(argv=None):
    if not argv:
        argv = sys.argv

    parser = makeParser()
    (options, args) = parser.parse_args(argv[1:])

    if len(args) < 3:
        usage()
        sys.exit(2)

    factor = args[0]
    hitfile = args[1]
    outfilename = args[2]

    findall(factor, hitfile, outfilename, options.minHits, options.minRatio, options.maxSpacing, options.listPeak, options.shift,
            options.stringency, options.noshift, options.autoshift, options.reportshift,
            options.minPlusRatio, options.maxPlusRatio, options.leftPlusRatio, options.minPeak,
            options.normalize, options.logfilename, options.withFlag, options.doDirectionality,
            options.trimValue, options.doTrim, options.doAppend, options.rnaSettings,
            options.cachePages, options.ptype, options.mockfile, options.doRevBackground, options.noMulti,
            options.strandfilter, options.combine5p)


def makeParser():
    usage = __doc__

    parser = optparse.OptionParser(usage=usage)
    parser.add_option("--control", dest="mockfile")
    parser.add_option("--minimum", type="float", dest="minHits")
    parser.add_option("--ratio", type="float", dest="minRatio")
    parser.add_option("--spacing", type="int", dest="maxSpacing")
    parser.add_option("--listPeak", action="store_true", dest="listPeak")
    parser.add_option("--shift", dest="shift")
    parser.add_option("--learnFold", type="float", dest="stringency")
    parser.add_option("--noshift", action="store_true", dest="noShift")
    parser.add_option("--autoshift", action="store_true", dest="autoshift")
    parser.add_option("--reportshift", action="store_true", dest="reportshift")
    parser.add_option("--nomulti", action="store_true", dest="noMulti")
    parser.add_option("--minPlus", type="float", dest="minPlusRatio")
    parser.add_option("--maxPlus", type="float", dest="maxPlusRatio")
    parser.add_option("--leftPlus", type="float", dest="leftPlusRatio")
    parser.add_option("--minPeak", type="float", dest="minPeak")
    parser.add_option("--raw", action="store_false", dest="normalize")
    parser.add_option("--revbackground", action="store_true", dest="doRevBackground")
    parser.add_option("--pvalue", dest="ptype")
    parser.add_option("--nodirectionality", action="store_false", dest="doDirectionality")
    parser.add_option("--strandfilter", dest="strandfilter")
    parser.add_option("--trimvalue", type="float", dest="trimValue")
    parser.add_option("--notrim", action="store_false", dest="doTrim")
    parser.add_option("--cache", type="int", dest="cachePages")
    parser.add_option("--log", dest="logfilename")
    parser.add_option("--flag", dest="withFlag")
    parser.add_option("--append", action="store_true", dest="doAppend")
    parser.add_option("--RNA", action="store_true", dest="rnaSettings")
    parser.add_option("--combine5p", action="store_true", dest="combine5p")

    configParser = getConfigParser()
    section = "findall"
    minHits = getConfigFloatOption(configParser, section, "minHits", 4.0)
    minRatio = getConfigFloatOption(configParser, section, "minRatio", 4.0)
    maxSpacing = getConfigIntOption(configParser, section, "maxSpacing", 50)
    listPeak = getConfigBoolOption(configParser, section, "listPeak", False)
    shift = getConfigOption(configParser, section, "shift", None)
    stringency = getConfigFloatOption(configParser, section, "stringency", 4.0)
    noshift = getConfigBoolOption(configParser, section, "noshift", False)
    autoshift = getConfigBoolOption(configParser, section, "autoshift", False)
    reportshift = getConfigBoolOption(configParser, section, "reportshift", False)
    minPlusRatio = getConfigFloatOption(configParser, section, "minPlusRatio", 0.25)
    maxPlusRatio = getConfigFloatOption(configParser, section, "maxPlusRatio", 0.75)
    leftPlusRatio = getConfigFloatOption(configParser, section, "leftPlusRatio", 0.3)
    minPeak = getConfigFloatOption(configParser, section, "minPeak", 0.5)
    normalize = getConfigBoolOption(configParser, section, "normalize", True)
    logfilename = getConfigOption(configParser, section, "logfilename", "findall.log")
    withFlag = getConfigOption(configParser, section, "withFlag", "")
    doDirectionality = getConfigBoolOption(configParser, section, "doDirectionality", True)
    trimValue = getConfigOption(configParser, section, "trimValue", None)
    doTrim = getConfigBoolOption(configParser, section, "doTrim", True)
    doAppend = getConfigBoolOption(configParser, section, "doAppend", False)
    rnaSettings = getConfigBoolOption(configParser, section, "rnaSettings", False)
    cachePages = getConfigOption(configParser, section, "cachePages", None)
    ptype = getConfigOption(configParser, section, "ptype", None)
    mockfile = getConfigOption(configParser, section, "mockfile", None)
    doRevBackground = getConfigBoolOption(configParser, section, "doRevBackground", False)
    noMulti = getConfigBoolOption(configParser, section, "noMulti", False)
    strandfilter = getConfigOption(configParser, section, "strandfilter", None)
    combine5p = getConfigBoolOption(configParser, section, "combine5p", False)

    parser.set_defaults(minHits=minHits, minRatio=minRatio, maxSpacing=maxSpacing, listPeak=listPeak, shift=shift,
                        stringency=stringency, noshift=noshift, autoshift=autoshift, reportshift=reportshift,
                        minPlusRatio=minPlusRatio, maxPlusRatio=maxPlusRatio, leftPlusRatio=leftPlusRatio, minPeak=minPeak,
                        normalize=normalize, logfilename=logfilename, withFlag=withFlag, doDirectionality=doDirectionality,
                        trimValue=trimValue, doTrim=doTrim, doAppend=doAppend, rnaSettings=rnaSettings,
                        cachePages=cachePages, ptype=ptype, mockfile=mockfile, doRevBackground=doRevBackground, noMulti=noMulti,
                        strandfilter=strandfilter, combine5p=combine5p)

    return parser


def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing=50, listPeak=False, shift=None,
            stringency=4.0, noshift=False, autoshift=False, reportshift=False,
            minPlusRatio=0.25, maxPlusRatio=0.75, leftPlusRatio=0.3, minPeak=0.5,
            normalize=True, logfilename="findall.log", withFlag="", doDirectionality=True,
            trimValue=None, doTrim=True, doAppend=False, rnaSettings=False,
            cachePages=None, ptype=None, mockfile=None, doRevBackground=False, noMulti=False,
            strandfilter=None, combine5p=False):

    shiftValue = determineShiftValue(autoshift, shift, noshift, rnaSettings)

    if trimValue is not None:
        trimValue = float(trimValue) / 100.
        trimString = "%2.1f%s" % ((100. * trimValue), "%")
    else:
        trimValue = 0.1
        trimString = "10%"

    if not doTrim:
        trimString = "none"

    if doRevBackground:
        print "Swapping IP and background to calculate FDR"
        pValueType = "back"

    doControl = False
    if mockfile is not None:
        doControl = True

    doPvalue = True
    if ptype is not None:
        ptype = ptype.upper()
        if ptype == "NONE":
            doPvalue = False
            pValueType = "none"
            p = 1
            poissonmean = 0
        elif ptype == "SELF":
            pValueType = "self"
        elif ptype == "BACK":
            if doControl and doRevBackground:
                pValueType = "back"
            else:
                print "must have a control dataset and -revbackground for pValue type 'back'"
        else:
            print "could not use pValue type : %s" % ptype
    else:
        pValueType = "self"

    if cachePages is not None:
        doCache = True
    else:
        doCache = False
        cachePages = -1

    if withFlag != "":
        print "restrict to flag = %s" % withFlag

    useMulti = True
    if noMulti:
        print "using unique reads only"
        useMulti = False

    if rnaSettings:
        print "using settings appropriate for RNA: -nodirectionality -notrim -noshift"
        doTrim = False
        doDirectionality = False

    stranded = ""
    if strandfilter is not None:
        if strandfilter == "plus":
            stranded = "+"
            minPlusRatio = 0.9
            maxPlusRatio = 1.0
            print "only analyzing reads on the plus strand"
        elif strandfilter == "minus":
            stranded = "-"
            minPlusRatio = 0.0
            maxPlusRatio = 0.1
            print "only analyzing reads on the minus strand"

    stringency = max(stringency, 1.0)
    writeLog(logfilename, versionString, string.join(sys.argv[1:]))
    if doControl:
        print "\ncontrol:" 
        mockRDS = ReadDataset.ReadDataset(mockfile, verbose=True, cache=doCache)

        if cachePages > mockRDS.getDefaultCacheSize():
            mockRDS.setDBcache(cachePages)

    print "\nsample:" 
    hitRDS = ReadDataset.ReadDataset(hitfile, verbose=True, cache=doCache)
    readlen = hitRDS.getReadSize()
    if rnaSettings:
        maxSpacing = readlen

    if cachePages > hitRDS.getDefaultCacheSize():
        hitRDS.setDBcache(cachePages)

    if doAppend:
        fileMode = "a"
    else:
        fileMode = "w"

    outfile = open(outfilename, fileMode)

    outfile.write("#ERANGE %s\n" % versionString)
    if doControl:
        mockRDSsize = len(mockRDS) / 1000000.
        controlSampleString = "\t%s (%.1f M reads)" % (mockfile, mockRDSsize)
    else:
        controlSampleString = " none"

    hitRDSsize = len(hitRDS) / 1000000.
    outfile.write("#enriched sample:\t%s (%.1f M reads)\n#control sample:%s\n" % (hitfile, hitRDSsize, controlSampleString))

    if withFlag != "":
        outfile.write("#restrict to Flag = %s\n" % withFlag)

    print "\nenforceDirectionality=%s listPeak=%s nomulti=%s cache=%s " % (doDirectionality, listPeak, noMulti, doCache)
    print "spacing<%d minimum>%.1f ratio>%.1f minPeak=%.1f\ttrimmed=%s\tstrand=%s" % (maxSpacing, minHits, minRatio, minPeak, trimString, stranded)
    print "minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%s pvalue=%s" % (minPlusRatio, maxPlusRatio, leftPlusRatio, str(shiftValue), pValueType)

    outfile.write("#enforceDirectionality=%s listPeak=%s nomulti=%s cache=%s\n" % (doDirectionality, listPeak, noMulti, doCache))
    outfile.write("#spacing<%d minimum>%.1f ratio>%.1f minPeak=%.1f trimmed=%s strand=%s\n" % (maxSpacing, minHits, minRatio, minPeak, trimString, stranded))
    outfile.write("#minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%s pvalue=%s\n" % (minPlusRatio, maxPlusRatio, leftPlusRatio, str(shiftValue), pValueType))
    if normalize:
        print "Normalizing to RPM"
        countLabel = "RPM"
    else:
        countLabel = "COUNT"

    headerList = ["#regionID\tchrom\tstart\tstop", countLabel, "fold\tmulti%"]
    if doDirectionality:
        headerList.append("plus%\tleftPlus%")

    if listPeak:
        headerList.append("peakPos\tpeakHeight")

    if reportshift:
        headerList.append("readShift")

    if doPvalue:
        headerList.append("pValue")

    headline = string.join(headerList, "\t")
    print >> outfile, headline

    statistics = {"index": 0,
                  "total": 0,
                  "mIndex": 0,
                  "mTotal": 0,
                  "failed": 0
    }

    if minRatio < minPeak:
        minPeak = minRatio

    hitChromList = hitRDS.getChromosomes()
    if doControl:
        mockChromList = mockRDS.getChromosomes()

    if normalize:
        if doControl:
            mockSampleSize = mockRDSsize

        hitSampleSize = hitRDSsize

    hitChromList.sort()

    for chromosome in hitChromList:
        if doNotProcessChromosome(chromosome, doControl, mockChromList):
            continue

        print "chromosome %s" % (chromosome)
        hitDict = hitRDS.getReadsDict(fullChrom=True, chrom=chromosome, flag=withFlag, withWeight=True,
                                      doMulti=useMulti, findallOptimize=True, strand=stranded,
                                      combine5p=combine5p)
        maxCoord = hitRDS.getMaxCoordinate(chromosome, doMulti=useMulti)
        if shiftValue == "learn":
            shiftValue = learnShift(hitDict, hitSampleSize, mockRDS, chromosome, doControl, useMulti, normalize,
                                    mockSampleSize, minRatio, maxSpacing, maxCoord, stringency, readlen, minHits,
                                    logfilename, outfile, outfilename)

        regionStats, allRegionWeights, outregions = locateRegions(hitRDS, hitSampleSize, mockRDS, mockSampleSize,
                                                                  chromosome, useMulti, normalize, maxSpacing,
                                                                  doDirectionality, doTrim, minHits, minRatio,
                                                                  readlen, shiftValue, minPeak, minPlusRatio,
                                                                  maxPlusRatio, leftPlusRatio, listPeak, noMulti,
                                                                  doControl, factor, trimValue, outputRegionList=True)

        statistics["index"] += regionStats["index"]
        statistics["total"] += regionStats["total"]
        statistics["failed"] += regionStats["failed"]
        if not doRevBackground:
            if doPvalue:
                p, poissonmean = calculatePValue(allRegionWeights)

            print headline
            shiftModeValue = writeRegionsToFile(outfile, outregions, doPvalue, p, poissonmean, reportshift, shiftValue)
            continue

        #now do background swapping the two samples around
        print "calculating background..."
        backgroundTrimValue = 1/20.
        backgroundRegionStats, backgroundRegionWeights = locateRegions(mockRDS, mockSampleSize, hitRDS, hitSampleSize,
                                                                       chromosome, useMulti, normalize, maxSpacing,
                                                                       doDirectionality, doTrim, minHits, minRatio,
                                                                       readlen, shiftValue, minPeak, minPlusRatio,
                                                                       maxPlusRatio, leftPlusRatio, listPeak, noMulti,
                                                                       doControl, factor, backgroundTrimValue)

        statistics["mIndex"] += backgroundRegionStats["index"]
        statistics["mTotal"] += backgroundRegionStats["total"]
        statistics["failed"] += backgroundRegionStats["failed"]
        print statistics["mIndex"], statistics["mTotal"]
        if doPvalue:
            if pValueType == "self":
                p, poissonmean = calculatePValue(allRegionWeights)
            else:
                p, poissonmean = calculatePValue(backgroundRegionWeights)

        print headline
        shiftModeValue = writeRegionsToFile(outfile, outregions, doPvalue, p, poissonmean, reportshift, shiftValue)

    footer = getFooter(statistics, doDirectionality, doRevBackground, shiftValue, reportshift, shiftModeValue)
    print footer
    outfile.write(footer)
    outfile.close()

    writeLog(logfilename, versionString, "%s%s" % (outfilename, footer.replace("\n#", " | ")))


def determineShiftValue(autoshift, shift, noshift, rnaSettings):
    shiftValue = 0
    if autoshift:
        shiftValue = "auto"

    if shift is not None:
        try:
            shiftValue = int(shift)
        except ValueError:
            if shift == "learn":
                shiftValue = "learn"
                print "Will try to learn shift"

    if noshift or rnaSettings:
        shiftValue = 0

    return shiftValue


def doNotProcessChromosome(chromosome, doControl, mockChromList):
    skipChromosome = False
    if chromosome == "chrM":
        skipChromosome = True

    if doControl and (chromosome not in mockChromList):
        skipChromosome = True

    return skipChromosome


def calculatePValue(dataList):
    dataList.sort()
    listSize = float(len(dataList))
    try:
        poissonmean = sum(dataList) / listSize
    except ZeroDivisionError:
        poissonmean = 0

    print "Poisson n=%d, p=%f" % (listSize, poissonmean)
    p = math.exp(-poissonmean)

    return p, poissonmean


def learnShift(hitDict, hitSampleSize, mockRDS, chrom, doControl, useMulti, normalize, mockSampleSize, minRatio, maxSpacing, maxCoord,
               stringency, readlen, minHits, logfilename, outfile, outfilename, minSites=30):

    print "learning shift.... will need at least %d training sites" % minSites
    previousHit = -1 * maxSpacing
    hitList = [-1]
    totalWeight = 0
    readList = []
    shiftDict = {}
    count = 0
    numStarts = 0
    for read in hitDict[chrom]:
        pos = read["start"]
        sense = read["sense"]
        weight = read["weight"]
        if abs(pos - previousHit) > maxSpacing or pos == maxCoord:
            sumAll = totalWeight
            if normalize:
                sumAll /= hitSampleSize

            regionStart = hitList[0]
            regionStop = hitList[-1]
            regionLength = regionStop - regionStart
            # we're going to require stringent settings
            if sumAll >= stringency * minHits and numStarts > stringency * minRatio and regionLength > stringency * readlen:
                foldRatio = getFoldRatio(mockRDS, chrom, regionStart, regionStop, doControl, useMulti, normalize, mockSampleSize, sumAll, minRatio)

                if foldRatio >= minRatio:
                    localshift = getBestShiftForRegion(readList, regionStart, regionLength, doWeight=True)
                    try:
                        shiftDict[localshift] += 1
                    except KeyError:
                        shiftDict[localshift] = 1

                    count += 1

            hitList = []
            totalWeight = 0
            readList = []
            numStarts = 0

        if pos not in hitList:
            numStarts += 1

        hitList.append(pos)
        totalWeight += weight
        readList.append({"start": pos, "sense": sense, "weight": weight})
        previousHit = pos

    bestShift = 0
    bestCount = 0
    learningSettings = ["#learn: stringency=%.2f min_signal=%2.f min_ratio=%.2f min_region_size=%d" % (stringency, stringency * minHits,
                                                                                                       stringency * minRatio, stringency * readlen),
                        "#number of training examples: %d" % count]
    outline = string.join(learningSettings, "\n")
    print outline
    writeLog(logfilename, versionString, "%s%s" % (outfilename, outline))
    if count < minSites:
        outline = "#too few training examples to pick a shiftValue - defaulting to 0\n#consider picking a lower minimum or threshold"
        print >> outfile, outline
        writeLog(logfilename, versionString, "%s%s" % (outfilename, outline))
        shiftValue = 0
    else:
        for shift in sorted(shiftDict):
            if shiftDict[shift] > bestCount:
                bestShift = shift
                bestCount = shiftDict[shift]

        shiftValue = bestShift
        print shiftDict

    outline = "#picked shiftValue to be %d" % shiftValue
    print outline
    print >> outfile, outline
    writeLog(logfilename, versionString, "%s%s" % (outfilename, outline))

    return shiftValue


def getFoldRatio(rds, chrom, start, stop, doControl, useMulti, normalize, sampleSize, sumAll, minRatio):
    if doControl:
        foldRatio = getFoldRatioFromRDS(rds, chrom, start, stop, useMulti, normalize, sampleSize, sumAll)
    else:
        foldRatio = minRatio

    return foldRatio


def getFoldRatioFromRDS(rds, chrom, start, stop, useMulti, normalize, sampleSize, sumAll):
    numMock = 1. + rds.getCounts(chrom, start, stop, uniqs=True, multi=useMulti, splices=False, reportCombined=True)
    if normalize:
        numMock /= sampleSize

    foldRatio = sumAll / numMock

    return foldRatio


def locateRegions(rds, rdsSampleSize, referenceRDS, referenceSampleSize, chrom, useMulti,
                normalize, maxSpacing, doDirectionality, doTrim, minHits, minRatio, readlen,
                shiftValue, minPeak, minPlusRatio, maxPlusRatio, leftPlusRatio, listPeak,
                noMulti, doControl, factor, trimValue, outputRegionList=False):

    index = 0
    totalRegionWeight = 0
    failedCounter = 0
    previousHit = - 1 * maxSpacing
    currentHitList = [-1]
    currentTotalWeight = 0
    currentUniqReadCount = 0
    currentReadList = []
    regionWeights = []
    outregions = []
    numStarts = 0
    hitDict = rds.getReadsDict(fullChrom=True, chrom=chrom, withWeight=True, doMulti=useMulti, findallOptimize=True)
    maxCoord = rds.getMaxCoordinate(chrom, doMulti=useMulti)
    for read in hitDict[chrom]:
        pos = read["start"]
        sense = read["sense"]
        weight = read["weight"]
        if abs(pos - previousHit) > maxSpacing or pos == maxCoord:
            sumAll = currentTotalWeight
            if normalize:
                sumAll /= rdsSampleSize

            regionStart = currentHitList[0]
            regionStop = currentHitList[-1]
            regionWeights.append(int(sumAll))
            if sumAll >= minHits and numStarts > minRatio and (regionStop - regionStart) > readlen:
                sumMulti = 0.
                #first pass uses getFoldRatio on mockRDS as there may not be control
                foldRatio = getFoldRatioFromRDS(referenceRDS, chrom, regionStart, regionStop, useMulti, normalize, referenceSampleSize, sumAll)
                if foldRatio >= minRatio:
                    # first pass, with absolute numbers
                    peak = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=shiftValue)

                    bestPos = peak.topPos[0]
                    numHits = peak.numHits
                    peakScore = peak.smoothArray[bestPos]
                    numPlus = peak.numPlus
                    shift = peak.shift
                    numLeft = peak.numLeft
                    if normalize:
                        peakScore /= rdsSampleSize

                    if doTrim:
                        minSignalThresh = trimValue * peakScore
                        start = 0
                        stop = regionStop - regionStart - 1
                        startFound = False
                        while not startFound:
                            if peak.smoothArray[start] >= minSignalThresh or start == bestPos:
                                startFound = True
                            else:
                                start += 1

                        stopFound = False
                        while not stopFound:
                            if peak.smoothArray[stop] >= minSignalThresh or stop == bestPos:
                                stopFound = True
                            else:
                                stop -= 1

                        regionStop = regionStart + stop
                        regionStart += start
                        trimPeak = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=shift)

                        sumAll = trimPeak.numHits
                        numPlus = trimPeak.numPlus
                        numLeft = trimPeak.numLeft
                        if normalize:
                            sumAll /= rdsSampleSize

                        foldRatio = getFoldRatio(referenceRDS, chrom, regionStart, regionStop, doControl, useMulti, normalize, referenceSampleSize, sumAll, minRatio)
                        if outputRegionList:
                            sumMulti = rds.getCounts(chrom, regionStart, regionStop, uniqs=False, multi=useMulti, splices=False, reportCombined=True)
                        # just in case it changed, use latest data
                        try:
                            bestPos = trimPeak.topPos[0]
                            peakScore = trimPeak.smoothArray[bestPos]
                        except:
                            continue

                        # normalize to RPM
                        if normalize:
                            peakScore /= rdsSampleSize

                    elif outputRegionList:
                        sumMulti = currentTotalWeight - currentUniqReadCount

                    if outputRegionList:
                        # normalize to RPM
                        if normalize:
                            sumMulti /= rdsSampleSize

                        try:
                            multiP = 100. * (sumMulti / sumAll)
                        except:
                            break

                        if noMulti:
                            multiP = 0.

                    # check that we still pass threshold
                    if sumAll >= minHits and  foldRatio >= minRatio and (regionStop - regionStart) > readlen:
                        plusRatio = float(numPlus)/numHits
                        if peakScore >= minPeak and minPlusRatio <= plusRatio <= maxPlusRatio:
                            if outputRegionList:
                                peakDescription = ""
                                if listPeak:
                                    peakDescription = "\t%d\t%.1f" % (regionStart + bestPos, peakScore)

                            if doDirectionality:
                                if leftPlusRatio < numLeft / numPlus:
                                    index += 1
                                    if outputRegionList:
                                        plusP = plusRatio * 100.
                                        leftP = 100. * numLeft / numPlus
                                        # we have a region that passes all criteria
                                        region = Region.DirectionalRegion(regionStart, regionStop + readlen - 1,
                                                                          factor, index, chrom, sumAll,
                                                                          foldRatio, multiP, plusP, leftP,
                                                                          peakDescription, shift)
                                        outregions.append(region)

                                    totalRegionWeight += sumAll
                                else:
                                    failedCounter += 1
                            else:
                                # we have a region, but didn't check for directionality
                                index += 1
                                totalRegionWeight += sumAll
                                if outputRegionList:
                                    region = Region.Region(regionStart, regionStop + readlen - 1, factor, index, chrom,
                                                           sumAll, foldRatio, multiP, peakDescription, shift)
                                    outregions.append(region)

            currentHitList = []
            currentTotalWeight = 0
            currentUniqReadCount = 0
            currentReadList = []
            numStarts = 0

        if pos not in currentHitList:
            numStarts += 1

        currentHitList.append(pos)
        currentTotalWeight += weight
        if weight == 1.0:
            currentUniqReadCount += 1

        currentReadList.append({"start": pos, "sense": sense, "weight": weight})
        previousHit = pos

    statistics = {"index": index,
                  "total": totalRegionWeight,
                  "failed": failedCounter
    }

    if outputRegionList:
        return statistics, regionWeights, outregions
    else:
        return statistics, regionWeights


def writeRegionsToFile(outfile, outregions, doPvalue, pValue, poissonmean, reportshift, shiftValue):
    bestShift = 0
    shiftDict = {}
    for region in outregions:
        # iterative poisson from http://stackoverflow.com/questions/280797?sort=newest
        if reportshift:
            outputList = [region.printRegionWithShift()]
            if shiftValue == "auto":
                try:
                    shiftDict[region.shift] += 1
                except KeyError:
                    shiftDict[region.shift] = 1
        else:
            outputList = [region.printRegion()]

        # iterative poisson from http://stackoverflow.com/questions/280797?sort=newest
        if doPvalue:
            sumAll = int(region.numReads)
            for i in xrange(sumAll):
                pValue *= poissonmean
                pValue /= i+1

            outputList.append("%1.2f" % pValue)

        outline = string.join(outputList, "\t")
        print outline
        print >> outfile, outline

    bestCount = 0
    for shift in sorted(shiftDict):
        if shiftDict[shift] > bestCount:
            bestShift = shift
            bestCount = shiftDict[shift]

    return bestShift


def getFooter(stats, doDirectionality, doRevBackground, shiftValue, reportshift, shiftModeValue):
    footerList = ["#stats:\t%.1f RPM in %d regions" % (stats["total"], stats["index"])]
    if doDirectionality:
        footerList.append("#\t\t%d additional regions failed directionality filter" % stats["failed"])

    if doRevBackground:
        try:
            percent = min(100. * (float(stats["mIndex"])/stats["index"]), 100)
        except (ValueError, ZeroDivisionError):
            percent = 0.

        footerList.append("#%d regions (%.1f RPM) found in background (FDR = %.2f percent)" % (stats["mIndex"], stats["mTotal"], percent))

    if shiftValue == "auto" and reportshift:
        footerList.append("#mode of shift values: %d" % shiftModeValue)

    footer = string.join(footerList, "\n")

    return footer

if __name__ == "__main__":
    main(sys.argv)