"""
    usage: python $ERANGEPATH/findall.py label samplerdsfile regionoutfile
           [--control controlrdsfile] [--minimum minHits] [--ratio minRatio]
           [--spacing maxSpacing] [--listPeak] [--shift #bp | learn] [--learnFold num]
           [--noshift] [--autoshift] [--reportshift] [--nomulti] [--minPlus fraction]
           [--maxPlus fraction] [--leftPlus fraction] [--minPeak RPM] [--raw]
           [--revbackground] [--pvalue self|back|none] [--nodirectionality]
           [--strandfilter plus/minus] [--trimvalue percent] [--notrim]
           [--cache pages] [--log altlogfile] [--flag aflag] [--append] [--RNA]

           where values in brackets are optional and label is an arbitrary string.

           Use -ratio (default 4 fold) to set the minimum fold enrichment
           over the control, -minimum (default 4) is the minimum number of reads
           (RPM) within the region, and -spacing (default readlen) to set the maximum
           distance between reads in the region. -listPeak lists the peak of the
           region. Peaks mut be higher than -minPeak (default 0.5 RPM).
           Pvalues are calculated from the sample (change with -pvalue),
           unless the -revbackground flag and a control RDS file are provided.

           By default, all numbers and parameters are on a reads per
           million (RPM) basis. -raw will treat all settings, ratios and reported
           numbers as raw counts rather than RPM. Use -notrim to turn off region
           trimming and -trimvalue to control trimming (default 10% of peak signal)

           The peak finder uses minimal directionality information that can
           be turned off with -nodirectionality ; the fraction of + strand reads
           required to be to the left of the peak (default 0.3) can be set with
           -leftPlus ; -minPlus and -maxPlus change the minimum/maximum fraction
           of plus reads in a region, which (defaults 0.25 and 0.75, respectively).

           Use -shift to shift reads either by half the expected
           fragment length (default 0 bp) or '-shift learn ' to learn the shift
           based on the first chromosome. If you prefer to learn the shift
           manually, use -autoshift to calculate a per-region shift value, which
           scan be reported using -reportshift. -strandfilter should only be used
           when explicitely calling unshifted stranded peaks from non-ChIP-seq
           data such as directional RNA-seq. regionoutfile is written over by
           default unless given the -append flag.
"""

try:
    import psyco
    psyco.full()
except:
    pass

from commoncode import readDataset, writeLog, findPeak
import sys, math, string, optparse

versionString = "%s: version 3.141592653589" % sys.argv[0]
print versionString

def usage():
    print __doc__


def main(argv=None):
    if not argv:
        argv = sys.argv

    usage = __doc__

    parser = optparse.OptionParser(usage=usage)
    parser.add_option("--control", dest="mockfile")
    parser.add_option("--minimum", type="float", dest="minHits")
    parser.add_option("--ratio", type="float", dest="minRatio")
    parser.add_option("--spacing", type="int", dest="maxSpacing")
    parser.add_option("--listPeak", action="store_true", dest="listPeak")
    parser.add_option("--shift", dest="shift")
    parser.add_option("--learnFold", type="float", dest="stringency")
    parser.add_option("--noshift", action="store_true", dest="noShift")
    parser.add_option("--autoshift", action="store_true", dest="autoshift")
    parser.add_option("--reportshift", action="store_true", dest="reportshift")
    parser.add_option("--nomulti", action="store_true", dest="noMulti")
    parser.add_option("--minPlus", type="float", dest="minPlusRatio")
    parser.add_option("--maxPlus", type="float", dest="maxPlusRatio")
    parser.add_option("--leftPlus", type="float", dest="leftPlusRatio")
    parser.add_option("--minPeak", type="float", dest="minPeak")
    parser.add_option("--raw", action="store_false", dest="normalize")
    parser.add_option("--revbackground", action="store_true", dest="doRevBackground")
    parser.add_option("--pvalue", dest="ptype")
    parser.add_option("--nodirectionality", action="store_false", dest="doDirectionality")
    parser.add_option("--strandfilter", dest="strandfilter")
    parser.add_option("--trimvalue", type="float", dest="trimValue")
    parser.add_option("--notrim", action="store_false", dest="doTrim")
    parser.add_option("--cache", type="int", dest="cachePages")
    parser.add_option("--log", dest="logfilename")
    parser.add_option("--flag", dest="withFlag")
    parser.add_option("--append", action="store_true", dest="doAppend")
    parser.add_option("--RNA", action="store_true", dest="rnaSettings")
    parser.add_option("--combine5p", action="store_true", dest="combine5p")
    parser.set_defaults(minHits=4.0, minRatio=4.0, maxSpacing=50, listPeak=False, shift=None,
                        stringency=4.0, noshift=False, autoshift=False, reportshift=False,
                        minPlusRatio=0.25, maxPlusRatio=0.75, leftPlusRatio=0.3, minPeak=0.5,
                        normalize=True, logfilename="findall.log", withFlag="", doDirectionality=True,
                        trimValue=None, doTrim=True, doAppend=False, rnaSettings=False,
                        cachePages=None, ptype=None, mockfile=None, doRevBackground=False, noMulti=False,
                        strandfilter=None, combine5p=False)

    (options, args) = parser.parse_args(argv[1:])

    if len(args) < 3:
        usage()
        sys.exit(2)

    factor = args[0]
    hitfile = args[1]
    outfilename = args[2]

    findall(factor, hitfile, outfilename, options.minHits, options.minRatio, options.maxSpacing, options.listPeak, options.shift,
            options.stringency, options.noshift, options.autoshift, options.reportshift,
            options.minPlusRatio, options.maxPlusRatio, options.leftPlusRatio, options.minPeak,
            options.normalize, options.logfilename, options.withFlag, options.doDirectionality,
            options.trimValue, options.doTrim, options.doAppend, options.rnaSettings,
            options.cachePages, options.ptype, options.mockfile, options.doRevBackground, options.noMulti,
            options.strandfilter, options.combine5p)


def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing=50, listPeak=False, shift=None,
            stringency=4.0, noshift=False, autoshift=False, reportshift=False,
            minPlusRatio=0.25, maxPlusRatio=0.75, leftPlusRatio=0.3, minPeak=0.5,
            normalize=True, logfilename="findall.log", withFlag="", doDirectionality=True,
            trimValue=None, doTrim=True, doAppend=False, rnaSettings=False,
            cachePages=None, ptype=None, mockfile=None, doRevBackground=False, noMulti=False,
            strandfilter=None, combine5p=False):

    shiftValue = 0
    if autoshift:
        shiftValue = "auto"
        shiftDict = {}

    if shift is not None:
        try:
            shiftValue = int(shift)
        except ValueError:
            if shift == "learn":
                print "Will try to learn shift"

    if noshift:
        shiftValue = 0

    if normalize:
        print "Normalizing to RPM"

    if trimValue is not None:
        trimValue = float(trimValue) / 100.
        trimString = "%2.1f" % (100. * trimValue)
        trimString += "\%"
    else:
        trimValue = 0.1
        trimString = "10%"

    if not doTrim:
        trimString = "none"

    if doRevBackground:
        print "Swapping IP and background to calculate FDR"
        pValueType = "back"

    doControl = False
    if mockfile is not None:
        doControl = True

    if ptype is not None:
        ptype = ptype.upper()
        if ptype == "NONE":
            doPvalue = False
            pValueType = "none"
        elif ptype == "SELF":
            pValueType = "self"
        elif ptype == "BACK":
            if doControl and doRevBackground:
                pValueType = "back"
            else:
                print "must have a control dataset and -revbackground for pValue type 'back'"
        else:
            print "could not use pValue type : %s" % ptype
    else:
        doPvalue = True
        pValueType = "self"

    if cachePages is not None:
        doCache = True
    else:
        doCache = False
        cachePages = -1

    if withFlag != "":
        print "restrict to flag = %s" % withFlag

    useMulti = True
    if noMulti:
        print "using unique reads only"
        useMulti = False

    if rnaSettings:
        print "using settings appropriate for RNA: -nodirectionality -notrim -noshift"
        shiftValue = 0
        doTrim = False
        doDirectionality = False

    stranded = ""
    if strandfilter is not None:
        if strandfilter == "plus":
            stranded = "+"
            minPlusRatio = 0.9
            maxPlusRatio = 1.0
            print "only analyzing reads on the plus strand"
        elif strandfilter == "minus":
            stranded = "-"
            minPlusRatio = 0.0
            maxPlusRatio = 0.1
            print "only analyzing reads on the minus strand"

    stringency = max(stringency, 1.0)
    writeLog(logfilename, versionString, string.join(sys.argv[1:]))
    if doControl:
        print "\ncontrol:" 
        mockRDS = readDataset(mockfile, verbose=True, cache=doCache)

        if cachePages > mockRDS.getDefaultCacheSize():
            mockRDS.setDBcache(cachePages)

    print "\nsample:" 
    hitRDS = readDataset(hitfile, verbose=True, cache=doCache)
    readlen = hitRDS.getReadSize()
    if rnaSettings:
        maxSpacing = readlen

    print "\nenforceDirectionality=%s listPeak=%s nomulti=%s cache=%s " % (doDirectionality, listPeak, noMulti, doCache)
    print "spacing<%d minimum>%.1f ratio>%.1f minPeak=%.1f\ttrimmed=%s\tstrand=%s" % (maxSpacing, minHits, minRatio, minPeak, trimString, stranded)
    print "minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%s pvalue=%s" % (minPlusRatio, maxPlusRatio, leftPlusRatio, str(shiftValue), pValueType)

    if cachePages > hitRDS.getDefaultCacheSize():
        hitRDS.setDBcache(cachePages)
    print 

    hitRDSsize = len(hitRDS) / 1000000.
    if doControl:
        mockRDSsize = len(mockRDS) / 1000000.

    if normalize:
        if doControl:
            mockSampleSize = mockRDSsize
        hitSampleSize = hitRDSsize

    if doAppend:
        outfile = open(outfilename, "a")
    else:
        outfile = open(outfilename, "w")

    outfile.write("#ERANGE %s\n" % versionString)
    if doControl:
        outfile.write("#enriched sample:\t%s (%.1f M reads)\n#control sample:\t%s (%.1f M reads)\n" % (hitfile, hitRDSsize, mockfile, mockRDSsize))
    else:
        outfile.write("#enriched sample:\t%s (%.1f M reads)\n#control sample: none\n" % (hitfile, hitRDSsize))

    if withFlag != "":
        outfile.write("#restrict to Flag = %s\n" % withFlag)

    outfile.write("#enforceDirectionality=%s listPeak=%s nomulti=%s cache=%s\n" % (doDirectionality, listPeak, noMulti, doCache))
    outfile.write("#spacing<%d minimum>%.1f ratio>%.1f minPeak=%.1f trimmed=%s strand=%s\n" % (maxSpacing, minHits, minRatio, minPeak, trimString, stranded))
    outfile.write("#minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%s pvalue=%s\n" % (minPlusRatio, maxPlusRatio, leftPlusRatio, str(shiftValue), pValueType))
    if normalize:
        isNormalized = "RPM"
    else:
        isNormalized = "COUNT"

    headline = "#regionID\tchrom\tstart\tstop\t%s\tfold\tmulti" % isNormalized
    headline += "%"
    if doDirectionality:
        headline += "\tplus%\tleftPlus%"

    if listPeak:
        headline += "\tpeakPos\tpeakHeight"

    if reportshift:
        headline += "\treadShift"

    if doPvalue:
        headline += "\tpValue"

    outfile.write("%s\n" %  headline)

    index = 0
    total = 0

    mIndex = 0
    mTotal = 0

    if minRatio < minPeak:
        minPeak = minRatio

    hitChromList = hitRDS.getChromosomes()
    if doControl:
        mockChromList = mockRDS.getChromosomes()

    hitChromList.sort()

    failedCounter = 0
    for achrom in hitChromList:
        outregions = []
        allregions = []
        if achrom == "chrM":
            continue

        if doControl and (achrom not in mockChromList):
            continue

        print "chromosome %s" % (achrom)
        previousHit = - 1 * maxSpacing
        currentHitList = [-1]
        currentWeightList = [0]
        currentReadList = []
        numStarts = 0
        if withFlag == "":
            hitDict = hitRDS.getReadsDict(fullChrom=True, chrom=achrom, withWeight=True, doMulti=useMulti, findallOptimize=True, strand=stranded, combine5p=combine5p)
        else:
            hitDict = hitRDS.getReadsDict(fullChrom=True, chrom=achrom, flag=withFlag, withWeight=True, doMulti=useMulti, findallOptimize=True, strand=stranded, combine5p=combine5p)

        maxCoord = hitRDS.getMaxCoordinate(achrom, doMulti=useMulti)
        chromIndex = 1
        if shiftValue == "learn":
            print "learning shift.... will need at least 30 training sites"
            learnPreviousHit = -1 * maxSpacing
            learnHitList = [-1]
            learnWeightList = [0]
            learnReadList = []
            learnDict = {}
            learnCount = 0
            for (pos, sense, weight) in hitDict[achrom]:
                if abs(pos - learnPreviousHit) > maxSpacing or pos == maxCoord:
                    sumAll = sum(learnWeightList)
                    if normalize:
                        sumAll /= hitSampleSize

                    regionStart = learnHitList[0]
                    regionStop = learnHitList[-1]
                    # we're going to require stringent settings
                    if sumAll >= stringency * minHits and learnStarts > stringency * minRatio and (regionStop - regionStart) > stringency * readlen:
                        if doControl:
                            numMock = 1. + mockRDS.getCounts(achrom, regionStart, regionStop, uniqs=True, multi=useMulti, splices=False, reportCombined=True)
                            if normalize:
                                numMock /= mockSampleSize

                            foldRatio = sumAll / numMock
                        else:
                            foldRatio = minRatio

                        if foldRatio >= minRatio:
                            (topPos, numHits, smoothArray, numPlus, localshift) = findPeak(learnReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, shift="auto", returnShift=True)
                            try:
                                learnDict[localshift] += 1
                            except:
                                learnDict[localshift] = 1

                            learnCount += 1

                    learnHitList = []
                    learnWeightList = []
                    learnReadList = []

                if pos not in currentHitList:
                    learnStarts += 1

                learnHitList.append(pos)
                learnWeightList.append(weight)
                learnReadList.append((pos, sense, weight))
                learnPreviousHit = pos

            bestShift = 0
            bestCount = 0
            outline = "#learn: stringency=%.2f min_signal=%2.f min_ratio=%.2f min_region_size=%d\n#number of training examples: %d" % (stringency,stringency * minHits,stringency * minRatio,stringency * readlen,learnCount)
            print outline
            writeLog(logfilename, versionString, outfilename + outline)
            if learnCount < 30:
                outline = "#too few training examples to pick a shiftValue - defaulting to 0\n"
                outline = "#consider picking a lower minimum or threshold"
                print outline
                writeLog(logfilename, versionString, outfilename + outline)
                shiftValue = 0
            else:
                for shift in sorted(learnDict):
                    if learnDict[shift] > bestCount:
                        bestShift = shift
                        bestCount = learnDict[shift]

                shiftValue = bestShift
                print learnDict

            outline = "#picked shiftValue to be %d" % shiftValue
            print outline
            outfile.write(outline + "\n")
            writeLog(logfilename, versionString, outfilename + outline)

        for (pos, sense, weight) in hitDict[achrom]:
            chromIndex += 1
            if abs(pos - previousHit) > maxSpacing or pos == maxCoord:
                sumAll = sum(currentWeightList)
                if normalize:
                    sumAll /= hitSampleSize

                regionStart = currentHitList[0]
                regionStop = currentHitList[-1]
                allregions.append(int(sumAll))
                if sumAll >= minHits and numStarts > minRatio and (regionStop - regionStart) > readlen:
                    sumMulti = 0.
                    if doControl:
                        numMock = 1. + mockRDS.getCounts(achrom, regionStart, regionStop, uniqs=True, multi=useMulti, splices=False, reportCombined=True)
                        if normalize:
                            numMock /= mockSampleSize

                        foldRatio = sumAll / numMock
                    else:
                        foldRatio = minRatio

                    if foldRatio >= minRatio:
                        # first pass, with absolute numbers
                        if doDirectionality:
                            (topPos, numHits, smoothArray, numPlus, numLeft, localshift) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=shiftValue, returnShift=True)
                        else:
                            (topPos, numHits, smoothArray, numPlus, localshift) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, shift=shiftValue, returnShift=True)

                        bestPos = topPos[0]
                        peakScore = smoothArray[bestPos]
                        if normalize:
                            peakScore /= hitSampleSize

                        if doTrim:
                            minSignalThresh = trimValue * peakScore 
                            start = 0
                            stop = regionStop - regionStart - 1
                            startFound = False
                            while not startFound:
                                if smoothArray[start] >= minSignalThresh or start == bestPos:
                                    startFound = True
                                else:
                                    start += 1

                            stopFound = False
                            while not stopFound:
                                if smoothArray[stop] >= minSignalThresh or stop == bestPos:
                                    stopFound = True
                                else:
                                    stop -= 1

                            regionStop = regionStart + stop
                            regionStart += start
                            try:
                                if doDirectionality:
                                    (topPos, sumAll, smoothArray, numPlus, numLeft) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=localshift)
                                else:
                                    (topPos, sumAll, smoothArray, numPlus) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, shift=localshift)
                            except:
                                continue

                            if normalize:
                                sumAll /= hitSampleSize

                            if doControl:
                                numMock = 1. + mockRDS.getCounts(achrom, regionStart, regionStop, uniqs=True, multi=useMulti, splices=False, reportCombined=True)
                                if normalize:
                                    numMock /= mockSampleSize

                                foldRatio = sumAll / numMock
                            else:
                                foldRatio = minRatio

                            sumMulti = hitRDS.getCounts(achrom, regionStart, regionStop, uniqs=False, multi=useMulti, splices=False, reportCombined=True)
                            # just in case it changed, use latest data
                            try:
                                bestPos = topPos[0]
                                peakScore = smoothArray[bestPos]
                                if normalize:
                                    peakScore /= hitSampleSize
                            except:
                                continue
                        else:
                            sumMulti = sum(currentWeightList) - currentWeightList.count(1.0)

                        # normalize to RPM
                        if normalize:
                            sumMulti /= hitSampleSize

                        try:
                            multiP = 100. * (sumMulti / sumAll)
                        except:
                            break

                        if noMulti:
                            multiP = 0.

                        # check that we still pass threshold
                        if sumAll >= minHits and  foldRatio >= minRatio and (regionStop - regionStart) > readlen:
                            plusRatio = float(numPlus)/numHits
                            if peakScore >= minPeak and minPlusRatio <= plusRatio <= maxPlusRatio:
                                peak = ""
                                if listPeak:
                                    peak= "\t%d\t%.1f" % (regionStart + bestPos, peakScore)

                                if doDirectionality:
                                    if leftPlusRatio < numLeft / numPlus:
                                        index += 1
                                        plusP = plusRatio * 100.
                                        leftP = 100. * numLeft / numPlus
                                        # we have a region that passes all criteria
                                        outregions.append((factor, index, achrom, regionStart, regionStop + readlen - 1, sumAll, foldRatio, multiP, plusP, leftP, peak, localshift))
                                        total += sumAll
                                    else:
                                        failedCounter += 1
                                else:
                                    index += 1
                                    # we have a region, but didn't check for directionality
                                    outregions.append((factor, index, achrom, regionStart, regionStop + readlen - 1, sumAll, foldRatio, multiP, peak, localshift))
                                    total += sumAll

                currentHitList = []
                currentWeightList = []
                currentReadList = []
                numStarts = 0

            if pos not in currentHitList:
                numStarts += 1

            currentHitList.append(pos)
            currentWeightList.append(weight)
            currentReadList.append((pos, sense, weight))
            previousHit = pos

        if not doRevBackground:
            if doPvalue:
                allregions.sort()
                regionallSize = float(len(allregions))
                try:
                    poissonmean = sum(allregions) / regionallSize
                except:
                    poissonmean = 0

                print "Poisson n=%d, p=%f" % (regionallSize, poissonmean)
                p = math.exp(-poissonmean)

            print headline
            for region in outregions:
                # iterative poisson from http://stackoverflow.com/questions/280797?sort=newest
                if doPvalue:
                    pValue = p
                    sumAll = int(region[5])
                    for i in xrange(sumAll):
                        pValue *= poissonmean
                        pValue /= i+1

                try:
                    if reportshift:
                        outline = "%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f%s\t%d" % region
                    else:
                        outline = "%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f%s" % region[:-1]
                except:
                    if reportshift:
                        outline = "%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f%s\t%d" % region
                    else:
                        outline = "%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f%s" % region[:-1]

                if doPvalue:
                    outline += "\t%1.2g" % pValue

                print outline
                outfile.write(outline + "\n")

            continue

        #now do background swapping the two samples around
        print "calculating background..."
        previousHit = - 1 * maxSpacing
        currentHitList = [-1]
        currentWeightList = [0]
        currentReadList = []
        backregions = []
        numStarts = 0
        hitDict = mockRDS.getReadsDict(fullChrom=True, chrom=achrom, withWeight=True, doMulti=useMulti, findallOptimize=True)
        maxCoord = mockRDS.getMaxCoordinate(achrom, doMulti=useMulti)
        for (pos, sense, weight) in hitDict[achrom]:
            if abs(pos - previousHit) > maxSpacing or pos == maxCoord:
                sumAll = sum(currentWeightList)
                if normalize:
                    sumAll /= mockSampleSize

                regionStart = currentHitList[0]
                regionStop = currentHitList[-1]
                backregions.append(int(sumAll))
                if sumAll >= minHits and numStarts > minRatio and (regionStop - regionStart) > readlen:
                    numMock = 1. + hitRDS.getCounts(achrom, regionStart, regionStop, uniqs=True, multi=useMulti, splices=False, reportCombined=True)
                    if normalize:
                        numMock /= hitSampleSize

                    foldRatio = sumAll / numMock
                    if foldRatio >= minRatio:
                        # first pass, with absolute numbers
                        if doDirectionality:
                            (topPos, numHits, smoothArray, numPlus, numLeft, backshift) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=shiftValue, returnShift=True)
                        else:
                            (topPos, numHits, smoothArray, numPlus, backshift) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, shift=shiftValue, returnShift=True)

                        bestPos = topPos[0]
                        peakScore = smoothArray[bestPos]
                        if normalize:
                            peakScore /= mockSampleSize

                        if doTrim:
                            minSignalThresh = peakScore / 20.
                            start = 0
                            stop = regionStop - regionStart - 1
                            startFound = False
                            while not startFound:
                                if smoothArray[start] >= minSignalThresh or start == bestPos:
                                    startFound = True
                                else:
                                    start += 1

                            stopFound = False
                            while not stopFound:
                                if smoothArray[stop] >= minSignalThresh or stop == bestPos:
                                    stopFound = True
                                else:
                                    stop -= 1

                            regionStop = regionStart + stop
                            regionStart += start
                            try:
                                if doDirectionality:
                                    (topPos, sumAll, smoothArray, numPlus, numLeft) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=backshift)
                                else:
                                    (topPos, sumAll, smoothArray, numPlus) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, shift=backshift)
                            except:
                                continue

                            numMock = 1. + hitRDS.getCounts(achrom, regionStart, regionStop, uniqs=True, multi=useMulti, splices=False, reportCombined=True)
                            if normalize:
                                sumAll /= mockSampleSize
                                numMock /= hitSampleSize

                            foldRatio = sumAll / numMock
                            # just in case it changed, use latest data
                            try:
                                bestPos = topPos[0]
                                peakScore = smoothArray[bestPos]
                            except:
                                continue

                            # normalize to RPM
                            if normalize:
                                peakScore /= mockSampleSize

                        # check that we still pass threshold
                        if sumAll >= minHits and  foldRatio >= minRatio and (regionStop - regionStart) > readlen:
                            plusRatio = float(numPlus)/numHits
                            if peakScore >= minPeak and minPlusRatio <= plusRatio <= maxPlusRatio:
                                if doDirectionality:
                                    if leftPlusRatio < numLeft / numPlus:
                                        mIndex += 1
                                        mTotal += sumAll
                                    else:
                                        failedCounter += 1
                                else:
                                    # we have a region, but didn't check for directionality
                                    mIndex += 1
                                    mTotal += sumAll

                currentHitList = []
                currentWeightList = []
                currentReadList = []
                numStarts = 0

            if pos not in currentHitList:
                numStarts += 1

            currentHitList.append(pos)
            currentWeightList.append(weight)
            currentReadList.append((pos, sense, weight))
            previousHit = pos

        print mIndex, mTotal
        if doPvalue:
            if pValueType == "self":
                allregions.sort()
                regionallSize = float(len(allregions))
                try:
                    poissonmean = sum(allregions) / regionallSize
                except:
                    poissonmean = 0

                print "Poisson n=%d, p=%f" % (regionallSize, poissonmean)
            else:
                backregions.sort()
                backregionSize = float(len(backregions))
                try:
                    poissonmean = sum(backregions) / backregionSize
                except:
                    poissonmean = 0

                print "Poisson n=%d, p=%f" % (backregionSize, poissonmean)
      
            p = math.exp(-poissonmean)

        print headline
        for region in outregions:
            # iterative poisson from http://stackoverflow.com/questions/280797?sort=newest
            if doPvalue:
                pValue = p
                sumAll = int(region[5])
                for i in xrange(sumAll):
                    pValue *= poissonmean
                    pValue /= i+1

            if shiftValue == "auto" and reportshift:
                try:
                    shiftDict[region[-1]] += 1
                except:
                    shiftDict[region[-1]] = 1
            try:
                if reportshift:
                    outline = "%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f%s\t%d" % region
                else:
                    outline = "%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f%s" % region[:-1]
            except:
                if reportshift:
                    outline = "%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f%s\t%d" % region
                else:
                    outline = "%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f%s" % region[:-1]

            if doPvalue:
                outline += "\t%1.2g" % pValue

            print outline
            outfile.write(outline + "\n")

    footer = "#stats:\t%.1f RPM in %d regions\n" % (total, index)
    if doDirectionality:
        footer += "#\t\t%d additional regions failed directionality filter\n" % failedCounter

    if doRevBackground:
        try:
            percent = 100. * (float(mIndex)/index)
        except:
            percent = 0.

        if percent > 100.:
            percent = 100.

        footer += "#%d regions (%.1f RPM) found in background (FDR = %.2f percent)\n" % (mIndex, mTotal, percent)

    if shiftValue == "auto" and reportshift:
        bestShift = 0
        bestCount = 0
        for shift in sorted(shiftDict):
            if shiftDict[shift] > bestCount:
                bestShift = shift
                bestCount = shiftDict[shift]

        footer += "#mode of shift values: %d\n" % bestShift

    print footer
    outfile.write(footer)
    outfile.close()

    writeLog(logfilename, versionString, outfilename + footer.replace("\n#", " | ")[:-1])


if __name__ == "__main__":
    main(sys.argv)
