"""
    usage: python $ERANGEPATH/findall.py label samplerdsfile regionoutfile
           [--control controlrdsfile] [--minimum minHits] [--ratio minRatio]
           [--spacing maxSpacing] [--listPeak] [--shift #bp | learn] [--learnFold num]
           [--noshift] [--autoshift] [--reportshift] [--nomulti] [--minPlus fraction]
           [--maxPlus fraction] [--leftPlus fraction] [--minPeak RPM] [--raw]
           [--revbackground] [--pvalue self|back|none] [--nodirectionality]
           [--strandfilter plus/minus] [--trimvalue percent] [--notrim]
           [--cache pages] [--log altlogfile] [--flag aflag] [--append] [--RNA]

           where values in brackets are optional and label is an arbitrary string.

           Use -ratio (default 4 fold) to set the minimum fold enrichment
           over the control, -minimum (default 4) is the minimum number of reads
           (RPM) within the region, and -spacing (default readlen) to set the maximum
           distance between reads in the region. -listPeak lists the peak of the
           region. Peaks mut be higher than -minPeak (default 0.5 RPM).
           Pvalues are calculated from the sample (change with -pvalue),
           unless the -revbackground flag and a control RDS file are provided.

           By default, all numbers and parameters are on a reads per
           million (RPM) basis. -raw will treat all settings, ratios and reported
           numbers as raw counts rather than RPM. Use -notrim to turn off region
           trimming and -trimvalue to control trimming (default 10% of peak signal)

           The peak finder uses minimal directionality information that can
           be turned off with -nodirectionality ; the fraction of + strand reads
           required to be to the left of the peak (default 0.3) can be set with
           -leftPlus ; -minPlus and -maxPlus change the minimum/maximum fraction
           of plus reads in a region, which (defaults 0.25 and 0.75, respectively).

           Use -shift to shift reads either by half the expected
           fragment length (default 0 bp) or '-shift learn ' to learn the shift
           based on the first chromosome. If you prefer to learn the shift
           manually, use -autoshift to calculate a per-region shift value, which
           can be reported using -reportshift. -strandfilter should only be used
           when explicitely calling unshifted stranded peaks from non-ChIP-seq
           data such as directional RNA-seq. regionoutfile is written over by
           default unless given the -append flag.
"""

try:
    import psyco
    psyco.full()
except:
    pass

import sys
import math
import string
import optparse
from commoncode import writeLog, findPeak, getBestShiftForRegion, getConfigParser, getConfigOption, getConfigIntOption, getConfigFloatOption, getConfigBoolOption
import ReadDataset
import Region


versionString = "findall: version 3.2"
print versionString

def usage():
    print __doc__


def main(argv=None):
    if not argv:
        argv = sys.argv

    parser = makeParser()
    (options, args) = parser.parse_args(argv[1:])

    if len(args) < 3:
        usage()
        sys.exit(2)

    factor = args[0]
    hitfile = args[1]
    outfilename = args[2]

    findall(factor, hitfile, outfilename, options.minHits, options.minRatio, options.maxSpacing, options.listPeak, options.shift,
            options.stringency, options.noshift, options.autoshift, options.reportshift,
            options.minPlusRatio, options.maxPlusRatio, options.leftPlusRatio, options.minPeak,
            options.normalize, options.logfilename, options.withFlag, options.doDirectionality,
            options.trimValue, options.doTrim, options.doAppend, options.rnaSettings,
            options.cachePages, options.ptype, options.mockfile, options.doRevBackground, options.noMulti,
            options.strandfilter, options.combine5p)


def makeParser():
    usage = __doc__

    parser = optparse.OptionParser(usage=usage)
    parser.add_option("--control", dest="mockfile")
    parser.add_option("--minimum", type="float", dest="minHits")
    parser.add_option("--ratio", type="float", dest="minRatio")
    parser.add_option("--spacing", type="int", dest="maxSpacing")
    parser.add_option("--listPeak", action="store_true", dest="listPeak")
    parser.add_option("--shift", dest="shift")
    parser.add_option("--learnFold", type="float", dest="stringency")
    parser.add_option("--noshift", action="store_true", dest="noShift")
    parser.add_option("--autoshift", action="store_true", dest="autoshift")
    parser.add_option("--reportshift", action="store_true", dest="reportshift")
    parser.add_option("--nomulti", action="store_true", dest="noMulti")
    parser.add_option("--minPlus", type="float", dest="minPlusRatio")
    parser.add_option("--maxPlus", type="float", dest="maxPlusRatio")
    parser.add_option("--leftPlus", type="float", dest="leftPlusRatio")
    parser.add_option("--minPeak", type="float", dest="minPeak")
    parser.add_option("--raw", action="store_false", dest="normalize")
    parser.add_option("--revbackground", action="store_true", dest="doRevBackground")
    parser.add_option("--pvalue", dest="ptype")
    parser.add_option("--nodirectionality", action="store_false", dest="doDirectionality")
    parser.add_option("--strandfilter", dest="strandfilter")
    parser.add_option("--trimvalue", type="float", dest="trimValue")
    parser.add_option("--notrim", action="store_false", dest="doTrim")
    parser.add_option("--cache", type="int", dest="cachePages")
    parser.add_option("--log", dest="logfilename")
    parser.add_option("--flag", dest="withFlag")
    parser.add_option("--append", action="store_true", dest="doAppend")
    parser.add_option("--RNA", action="store_true", dest="rnaSettings")
    parser.add_option("--combine5p", action="store_true", dest="combine5p")

    configParser = getConfigParser()
    section = "findall"
    minHits = getConfigFloatOption(configParser, section, "minHits", 4.0)
    minRatio = getConfigFloatOption(configParser, section, "minRatio", 4.0)
    maxSpacing = getConfigIntOption(configParser, section, "maxSpacing", 50)
    listPeak = getConfigBoolOption(configParser, section, "listPeak", False)
    shift = getConfigOption(configParser, section, "shift", None)
    stringency = getConfigFloatOption(configParser, section, "stringency", 4.0)
    noshift = getConfigBoolOption(configParser, section, "noshift", False)
    autoshift = getConfigBoolOption(configParser, section, "autoshift", False)
    reportshift = getConfigBoolOption(configParser, section, "reportshift", False)
    minPlusRatio = getConfigFloatOption(configParser, section, "minPlusRatio", 0.25)
    maxPlusRatio = getConfigFloatOption(configParser, section, "maxPlusRatio", 0.75)
    leftPlusRatio = getConfigFloatOption(configParser, section, "leftPlusRatio", 0.3)
    minPeak = getConfigFloatOption(configParser, section, "minPeak", 0.5)
    normalize = getConfigBoolOption(configParser, section, "normalize", True)
    logfilename = getConfigOption(configParser, section, "logfilename", "findall.log")
    withFlag = getConfigOption(configParser, section, "withFlag", "")
    doDirectionality = getConfigBoolOption(configParser, section, "doDirectionality", True)
    trimValue = getConfigOption(configParser, section, "trimValue", None)
    doTrim = getConfigBoolOption(configParser, section, "doTrim", True)
    doAppend = getConfigBoolOption(configParser, section, "doAppend", False)
    rnaSettings = getConfigBoolOption(configParser, section, "rnaSettings", False)
    cachePages = getConfigOption(configParser, section, "cachePages", None)
    ptype = getConfigOption(configParser, section, "ptype", None)
    mockfile = getConfigOption(configParser, section, "mockfile", None)
    doRevBackground = getConfigBoolOption(configParser, section, "doRevBackground", False)
    noMulti = getConfigBoolOption(configParser, section, "noMulti", False)
    strandfilter = getConfigOption(configParser, section, "strandfilter", None)
    combine5p = getConfigBoolOption(configParser, section, "combine5p", False)

    parser.set_defaults(minHits=minHits, minRatio=minRatio, maxSpacing=maxSpacing, listPeak=listPeak, shift=shift,
                        stringency=stringency, noshift=noshift, autoshift=autoshift, reportshift=reportshift,
                        minPlusRatio=minPlusRatio, maxPlusRatio=maxPlusRatio, leftPlusRatio=leftPlusRatio, minPeak=minPeak,
                        normalize=normalize, logfilename=logfilename, withFlag=withFlag, doDirectionality=doDirectionality,
                        trimValue=trimValue, doTrim=doTrim, doAppend=doAppend, rnaSettings=rnaSettings,
                        cachePages=cachePages, ptype=ptype, mockfile=mockfile, doRevBackground=doRevBackground, noMulti=noMulti,
                        strandfilter=strandfilter, combine5p=combine5p)

    return parser


def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing=50, listPeak=False, shift=None,
            stringency=4.0, noshift=False, autoshift=False, reportshift=False,
            minPlusRatio=0.25, maxPlusRatio=0.75, leftPlusRatio=0.3, minPeak=0.5,
            normalize=True, logfilename="findall.log", withFlag="", doDirectionality=True,
            trimValue=None, doTrim=True, doAppend=False, rnaSettings=False,
            cachePages=None, ptype=None, mockfile=None, doRevBackground=False, noMulti=False,
            strandfilter=None, combine5p=False):

    cachePages = -1
    doPvalue = True
    pValueType = 'self'
    shiftValue = 0
    stranded = 'both'

    if autoshift:
        shiftValue = 'auto'
        shiftDict = {}

    if shift is not None:
        try:
            shiftValue = int(shift)
        except:
            if shift == 'learn':
                shiftValue = 'learn'
                print "Will try to learn shift"

    if noshift:
        shiftValue = 0

    if normalize:
        print "Normalizing to RPM"

    if trimValue is not None:
        trimValue = 0.1
        trimString = '%2.1f' % (100. * trimValue)
        trimString += '\%'
    else:
        trimValue = 0.1
        trimString = '10%'

    if not doTrim:
        trimString = 'none'

    if doRevBackground:
        print "Swapping IP and background to calculate FDR"
        pValueType = 'back'

    doControl = False
    if mockfile is not None:
        doControl = True

    if ptype is not None:
        if ptype == 'NONE':
            doPvalue = False
            pValueType = 'none'
        elif ptype == 'SELF':
            pValueType = 'self'
        elif ptype == 'BACK':
            if doControl and doRevBackground:
                pValueType = 'back'
            else:
                print "must have a control dataset and -revbackground for pValue type 'back'"
        else:
            print "could not use pValue type : %s" % ptype

    doCache = False
    if cachePages is not None:
        doCache = True

    if withFlag != "":
        print "restrict to flag = %s" % withFlag

    useMulti = True
    if noMulti:
        print "using unique reads only"
        useMulti = False

    if rnaSettings:
        print "using settings appropriate for RNA: -nodirectionality -notrim -noshift"
        shiftValue = 0
        doTrim = False
        doDirectionality = False

    if strandfilter is not None:
        if strandfilter == 'plus':
            stranded = '+'
            minPlusRatio = 0.9
            maxPlusRatio = 1.0
            print "only analyzing reads on the plus strand"
        elif strandfilter == 'minus':
            stranded = '-'
            minPlusRatio = 0.0
            maxPlusRatio = 0.1
            print "only analyzing reads on the minus strand"

    stringency = max(stringency, 1.0)



    hitRDS = ReadDataset.ReadDataset(hitfile, verbose = False)
    readlen = hitRDS.getReadSize()
    if '-RNA' in sys.argv:
        maxSpacing = readlen

    writeLog(logfilename, versionString, string.join(sys.argv[1:]))

    print "\nenforceDirectionality=%s listPeak=%s nomulti=%s cache=%s " % (doDirectionality, listPeak, noMulti, doCache)
    print "spacing<%d minimum>%.1f ratio>%.1f minPeak=%.1f\ttrimmed=%s\tstrand=%s" % (maxSpacing, minHits, minRatio, minPeak, trimString, stranded)
    try:
        print "minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%d pvalue=%s" % (minPlusRatio, maxPlusRatio, leftPlusRatio, shiftValue, pValueType)
    except:
        print "minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%s pvalue=%s" % (minPlusRatio, maxPlusRatio, leftPlusRatio, shiftValue, pValueType)

    if doControl:
        print "\ncontrol:" 
        mockRDS = ReadDataset.ReadDataset(mockfile, verbose = True, cache=doCache)
        #sqlite default_cache_size is 2000 pages
        if cachePages > mockRDS.getDefaultCacheSize():
            mockRDS.setDBcache(cachePages)

    print "\nsample:" 
    hitRDS = ReadDataset.ReadDataset(hitfile, verbose = True, cache=doCache)
    #sqlite default_cache_size is 2000 pages
    if cachePages > hitRDS.getDefaultCacheSize():
        hitRDS.setDBcache(cachePages)
    print

    hitDictSize = 1
    hitRDSsize = len(hitRDS) / 1000000.
    if doControl:
        mockDictSize = 1
        mockRDSsize = len(mockRDS) / 1000000.

    if normalize:
        if doControl:
            mockSampleSize = mockRDSsize
        hitSampleSize = hitRDSsize

    if doAppend:
        outfile = open(outfilename, 'a')
    else:
        outfile = open(outfilename, 'w')

    outfile.write('#ERANGE %s\n' % versionString)
    if doControl:
        outfile.write('#enriched sample:\t%s (%.1f M reads)\n#control sample:\t%s (%.1f M reads)\n' % (hitfile, hitRDSsize, mockfile, mockRDSsize))
    else:
        outfile.write('#enriched sample:\t%s (%.1f M reads)\n#control sample: none\n' % (hitfile, hitRDSsize))
    if withFlag != '':
        outfile.write('#restrict to Flag = %s\n' % withFlag)
    outfile.write('#enforceDirectionality=%s listPeak=%s nomulti=%s cache=%s\n' % (doDirectionality, listPeak, noMulti, doCache))
    outfile.write('#spacing<%d minimum>%.1f ratio>%.1f minPeak=%.1f trimmed=%s strand=%s\n' % (maxSpacing, minHits, minRatio, minPeak, trimString, stranded))
    try:
        outfile.write('#minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%d pvalue=%s\n' % (minPlusRatio, maxPlusRatio, leftPlusRatio, shiftValue, pValueType))
    except:
        outfile.write('#minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%s pvalue=%s\n' % (minPlusRatio, maxPlusRatio, leftPlusRatio, shiftValue, pValueType))
    if normalize:
        isNormalized = 'RPM'
    else:
        isNormalized = 'COUNT'
    headline = '#regionID\tchrom\tstart\tstop\t%s\tfold\tmulti' % isNormalized
    headline += '%'
    if doDirectionality:
        headline += '\tplus%\tleftPlus%'
    if listPeak:
        headline += '\tpeakPos\tpeakHeight'
    if reportshift:
        headline += '\treadShift'
    if doPvalue:
        headline += '\tpValue'
    #print headline
    outfile.write('%s\n' %  headline)

    index = 0
    total = 0

    mIndex = 0
    mTotal = 0

    if minRatio < minPeak:
        minPeak = minRatio

    hitChromList = hitRDS.getChromosomes()
    if doControl:
        mockChromList = mockRDS.getChromosomes()
    hitChromList.sort()

    failedCounter = 0
    for achrom in hitChromList:
        outregions = []
        allregions = []
        if achrom == 'chrM':
            continue
        if doControl and (achrom not in mockChromList):
            continue
        print "chromosome %s" % (achrom)
        previousHit = - 1 * maxSpacing
        currentHitList = [-1]
        currentWeightList = [0]
        currentReadList = []
        mockStart = 0
        numStarts = 0
        if withFlag == '':
            hitDict = hitRDS.getReadsDict(fullChrom=True, chrom=achrom, withWeight=True, doMulti=useMulti, findallOptimize=True, strand=stranded, combine5p=combine5p)
        else:
            hitDict = hitRDS.getReadsDict(fullChrom=True, chrom=achrom, flag=withFlag, withWeight=True, doMulti=useMulti, findallOptimize=True, strand=stranded, combine5p=combine5p)
        #mockRDS.memSync(chrom=achrom, index=True)
        maxCoord = hitRDS.getMaxCoordinate(achrom, doMulti=useMulti)
        chromIndex = 1
        #TODO: QForAli -Really? Use first chr shift value for all of them
        if shiftValue == 'learn':
            print "learning shift.... will need at least 30 training sites"
            learnPreviousHit = -1 * maxSpacing
            learnHitList = [-1]
            learnWeightList = [0]
            learnReadList = []
            learnDict = {}
            learnCount = 0
            for (pos, sense, weight) in hitDict[achrom]:
                if abs(pos - learnPreviousHit) > maxSpacing or pos == maxCoord:
                    sumAll = sum(learnWeightList)
                    if normalize:
                        sumAll /= hitSampleSize
                    regionStart = learnHitList[0]
                    regionStop = learnHitList[-1]
                    # we're going to require stringent settings
                    if sumAll >= stringency * minHits and numStarts > stringency * minRatio and (regionStop - regionStart) > stringency * readlen:
                        if doControl:
                            numMock = 1. + mockRDS.getCounts(achrom, regionStart, regionStop, uniqs=True, multi=useMulti, splices=False, reportCombined=True)
                            if normalize:
                                numMock /= mockSampleSize
                            foldRatio = sumAll / numMock
                        else:
                            foldRatio = minRatio
                        if foldRatio >= minRatio:
                            (topPos, numHits, smoothArray, numPlus, localshift) = findPeak(learnReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, shift='auto', returnShift=True)
                            try:
                                learnDict[localshift] += 1
                            except:
                                learnDict[localshift] = 1
                            learnCount += 1
                    learnHitList = []
                    learnWeightList = []
                    learnReadList = []
                    learnStarts = 0
                if pos not in currentHitList:
                    numStarts += 1
                learnHitList.append(pos)
                learnWeightList.append(weight)
                learnReadList.append((pos, sense, weight))
                learnPreviousHit = pos
            bestShift = 0
            bestCount = 0
            outline = "#learn: stringency=%.2f min_signal=%2.f min_ratio=%.2f min_region_size=%d\n#number of training examples: %d" % (stringency,stringency * minHits,stringency * minRatio,stringency * readlen,learnCount)
            print outline
            writeLog(logfilename, versionString, outfilename + outline)
            if learnCount < 30:
                outline = "#too few training examples to pick a shiftValue - defaulting to 0\n"
                outline = "#consider picking a lower minimum or threshold"
                print outline
                writeLog(logfilename, versionString, outfilename + outline)
                shiftValue = 0
            else:
                for shift in sorted(learnDict):
                    if learnDict[shift] > bestCount:
                        bestShift = shift
                        bestCount = learnDict[shift]
                shiftValue = bestShift
                print learnDict
            outline = "#picked shiftValue to be %d" % shiftValue
            print outline
            outfile.write(outline + '\n')
            writeLog(logfilename, versionString, outfilename + outline)

        for (pos, sense, weight) in hitDict[achrom]:
            chromIndex += 1
            if abs(pos - previousHit) > maxSpacing or pos == maxCoord:
                sumAll = sum(currentWeightList)
                if normalize:
                    sumAll /= hitSampleSize
                regionStart = currentHitList[0]
                regionStop = currentHitList[-1]
                allregions.append(int(sumAll))
                if sumAll >= minHits and numStarts > minRatio and (regionStop - regionStart) > readlen:
                    sumMulti = 0.
                    if doControl:
                        numMock = 1. + mockRDS.getCounts(achrom, regionStart, regionStop, uniqs=True, multi=useMulti, splices=False, reportCombined=True)
                        if normalize:
                            numMock /= mockSampleSize
                        foldRatio = sumAll / numMock
                    else:
                        foldRatio = minRatio
                    if foldRatio >= minRatio:
                        # first pass, with absolute numbers
                        if doDirectionality:
                            (topPos, numHits, smoothArray, numPlus, numLeft, localshift) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=shiftValue, returnShift=True)
                        else:
                            (topPos, numHits, smoothArray, numPlus, localshift) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, shift=shiftValue, returnShift=True)
                        bestPos = topPos[0]
                        peakScore = smoothArray[bestPos]
                        if normalize:
                            peakScore /= hitSampleSize
                        if doTrim:
                            minSignalThresh = trimValue * peakScore 
                            start = 0
                            stop = regionStop - regionStart - 1
                            startFound = False
                            while not startFound:
                                if smoothArray[start] >= minSignalThresh or start == bestPos:
                                    startFound = True
                                else:
                                    start += 1
                            stopFound = False
                            while not stopFound:
                                if smoothArray[stop] >= minSignalThresh or stop == bestPos:
                                    stopFound = True
                                else:
                                    stop -= 1
                            regionStop = regionStart + stop
                            regionStart += start
                            try:
                                if doDirectionality:
                                    (topPos, sumAll, smoothArray, numPlus, numLeft) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=localshift)
                                else:
                                    (topPos, sumAll, smoothArray, numPlus) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, shift=localshift)
                            except:
                                continue
                            if normalize:
                                sumAll /= hitSampleSize
                            if doControl:
                                numMock = 1. + mockRDS.getCounts(achrom, regionStart, regionStop, uniqs=True, multi=useMulti, splices=False, reportCombined=True)
                                if normalize:
                                    numMock /= mockSampleSize
                                foldRatio = sumAll / numMock
                            else:
                                foldRatio = minRatio
                            sumMulti = hitRDS.getCounts(achrom, regionStart, regionStop, uniqs=False, multi=useMulti, splices=False, reportCombined=True)
                            # just in case it changed, use latest data
                            try:
                                bestPos = topPos[0]
                                peakScore = smoothArray[bestPos]
                                if normalize:
                                    peakScore /= hitSampleSize
                            except:
                                continue
                        else:
                            sumMulti = sum(currentWeightList) - currentWeightList.count(1.0)
                        # normalize to RPM
                        if normalize:
                            sumMulti /= hitSampleSize
                        try:
                            multiP = 100. * (sumMulti / sumAll)
                        except:
                            break
                        if noMulti:
                            multiP = 0.
                        # check that we still pass threshold
                        if sumAll >= minHits and  foldRatio >= minRatio and (regionStop - regionStart) > readlen:
                            plusRatio = float(numPlus)/numHits
                            #print ('considering', achrom, regionStart, regionStop, numHits, numPlus, numLeft, plusRatio)
                            if peakScore >= minPeak and minPlusRatio <= plusRatio <= maxPlusRatio:
                                peak = ''
                                if listPeak:
                                    peak= '\t%d\t%.1f' % (regionStart + bestPos, peakScore)
                                if doDirectionality:
                                    if leftPlusRatio < numLeft / numPlus:
                                        index += 1
                                        plusP = plusRatio * 100.
                                        leftP = 100. * numLeft / numPlus
                                        # we have a region that passes all criteria
                                        outregions.append((factor, index, achrom, regionStart, regionStop + readlen - 1, sumAll, foldRatio, multiP, plusP, leftP, peak, localshift))
                                        #outline = '%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f%s' % (factor, index, achrom, regionStart, regionStop + readlen - 1, sumAll, foldRatio, multiP, plusP, leftP, peak)
                                        #print outline
                                        #outfile.write('%s\n' %  outline)
                                        total += sumAll
                                    else:
                                        #print ('failed directionality', achrom, regionStart, regionStop, numHits, numPlus, numLeft)
                                        failedCounter += 1
                                else:
                                    index += 1
                                    # we have a region, but didn't check for directionality
                                    outregions.append((factor, index, achrom, regionStart, regionStop + readlen - 1, sumAll, foldRatio, multiP, peak, localshift))
                                    #outline = '%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f%s' % (factor, index, achrom, regionStart, regionStop + readlen - 1, sumAll, foldRatio, multiP, peak)
                                    #print outline
                                    #outfile.write('%s\n' %  outline)
                                    total += sumAll
                currentHitList = []
                currentWeightList = []
                currentReadList = []
                numStarts = 0
            if pos not in currentHitList:
                numStarts += 1
            currentHitList.append(pos)
            currentWeightList.append(weight)
            currentReadList.append((pos, sense, weight))
            previousHit = pos

        if not doRevBackground:
            if doPvalue:
                allregions.sort()
                regionallSize = float(len(allregions))
                try:
                    poissonmean = sum(allregions) / regionallSize
                except:
                    poissonmean = 0
                print "Poisson n=%d, p=%f" % (regionallSize, poissonmean)
                p = math.exp(-poissonmean)
            print headline
            for region in outregions:
                # iterative poisson from http://stackoverflow.com/questions/280797?sort=newest
                if doPvalue:
                    pValue = p
                    sumAll = int(region[5])
                    for i in xrange(sumAll):
                        pValue *= poissonmean
                        pValue /= i+1
                try:
                    if reportshift:
                        outline = '%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f%s\t%d' % region
                    else:
                        outline = '%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f%s' % region[:-1]
                except:
                    if reportshift:
                        outline = '%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f%s\t%d' % region
                    else:
                        outline = '%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f%s' % region[:-1]
                if doPvalue:
                    outline += '\t%1.2g' % pValue
                print outline
                outfile.write(outline + '\n')
            continue
        #now do background swapping the two samples around
        print "calculating background..."
        previousHit = - 1 * maxSpacing
        currentHitList = [-1]
        currentWeightList = [0]
        currentReadList = []
        backregions = []
        mockStart = 0
        numStarts = 0
        hitDict = mockRDS.getReadsDict(fullChrom=True, chrom=achrom, withWeight=True, doMulti=useMulti, findallOptimize=True)
        #mockRDS.memSync(chrom=achrom, index=True)
        maxCoord = mockRDS.getMaxCoordinate(achrom, doMulti=useMulti)
        for (pos, sense, weight) in hitDict[achrom]:
            if abs(pos - previousHit) > maxSpacing or pos == maxCoord:
                sumAll = sum(currentWeightList)
                if normalize:
                    sumAll /= mockSampleSize
                regionStart = currentHitList[0]
                regionStop = currentHitList[-1]
                backregions.append(int(sumAll))
                if sumAll >= minHits and numStarts > minRatio and (regionStop - regionStart) > readlen:
                    numMock = 1. + hitRDS.getCounts(achrom, regionStart, regionStop, uniqs=True, multi=useMulti, splices=False, reportCombined=True)
                    if normalize:
                        numMock /= hitSampleSize
                    foldRatio = sumAll / numMock
                    if foldRatio >= minRatio:
                        # first pass, with absolute numbers
                        if doDirectionality:
                            (topPos, numHits, smoothArray, numPlus, numLeft, backshift) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=shiftValue, returnShift=True)
                        else:
                            (topPos, numHits, smoothArray, numPlus, backshift) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, shift=shiftValue, returnShift=True)
                        bestPos = topPos[0]
                        peakScore = smoothArray[bestPos]
                        if normalize:
                            peakScore /= mockSampleSize
                        if doTrim:
                            minSignalThresh = peakScore / 20.
                            start = 0
                            stop = regionStop - regionStart - 1
                            startFound = False
                            while not startFound:
                                if smoothArray[start] >= minSignalThresh or start == bestPos:
                                    startFound = True
                                else:
                                    start += 1
                            stopFound = False
                            while not stopFound:
                                if smoothArray[stop] >= minSignalThresh or stop == bestPos:
                                    stopFound = True
                                else:
                                    stop -= 1
                            regionStop = regionStart + stop
                            regionStart += start
                            try:
                                if doDirectionality:
                                    (topPos, sumAll, smoothArray, numPlus, numLeft) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=backshift)
                                else:
                                    (topPos, sumAll, smoothArray, numPlus) = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, shift=backshift)
                            except:
                                continue
                            numMock = 1. + hitRDS.getCounts(achrom, regionStart, regionStop, uniqs=True, multi=useMulti, splices=False, reportCombined=True)
                            if normalize:
                                sumAll /= mockSampleSize
                                numMock /= hitSampleSize
                            foldRatio = sumAll / numMock
                            # just in case it changed, use latest data
                            try:
                                bestPos = topPos[0]
                                peakScore = smoothArray[bestPos]
                            except:
                                continue
                            # normalize to RPM
                            if normalize:
                                peakScore /= mockSampleSize
                        # check that we still pass threshold
                        if sumAll >= minHits and  foldRatio >= minRatio and (regionStop - regionStart) > readlen:
                            plusRatio = float(numPlus)/numHits
                            #print ('considering', achrom, regionStart, regionStop, numHits, numPlus, numLeft, plusRatio)
                            if peakScore >= minPeak and minPlusRatio <= plusRatio <= maxPlusRatio:
                                if doDirectionality:
                                    if leftPlusRatio < numLeft / numPlus:
                                        mIndex += 1
                                        mTotal += sumAll
                                    else:
                                        #print ('failed directionality', achrom, regionStart, regionStop, numHits, numPlus, numLeft)
                                        failedCounter += 1
                                else:
                                    # we have a region, but didn't check for directionality
                                    mIndex += 1
                                    mTotal += sumAll
            currentHitList = []
            currentWeightList = []
            currentReadList = []
            numStarts = 0
            if pos not in currentHitList:
                numStarts += 1
            currentHitList.append(pos)
            currentWeightList.append(weight)
            currentReadList.append((pos, sense, weight))
            previousHit = pos
        print mIndex, mTotal
        if doPvalue:
            if pValueType == 'self':
                allregions.sort()
                regionallSize = float(len(allregions))
                try:
                    poissonmean = sum(allregions) / regionallSize
                except:
                    poissonmean = 0
                print "Poisson n=%d, p=%f" % (regionallSize, poissonmean)
            else:
                backregions.sort()
                backregionSize = float(len(backregions))
                try:
                    poissonmean = sum(backregions) / backregionSize
                except:
                    poissonmean = 0
                print "Poisson n=%d, p=%f" % (backregionSize, poissonmean)        
            p = math.exp(-poissonmean)
        print headline
        for region in outregions:
            # iterative poisson from http://stackoverflow.com/questions/280797?sort=newest
            if doPvalue:
                pValue = p
                sumAll = int(region[5])
                for i in xrange(sumAll):
                    pValue *= poissonmean
                    pValue /= i+1
            if shiftValue == 'auto' and reportshift:
                try:
                    shiftDict[region[-1]] += 1
                except:
                    shiftDict[region[-1]] = 1
            try:
                if reportshift:
                    outline = '%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f%s\t%d' % region
                else:
                    outline = '%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f%s' % region[:-1]
            except:
                if reportshift:
                    outline = '%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f%s\t%d' % region
                else:
                    outline = '%s%d\t%s\t%d\t%d\t%.1f\t%.1f\t%.1f%s' % region[:-1]
            if doPvalue:
                outline += '\t%1.2g' % pValue
            print outline
            outfile.write(outline + '\n')

    footer = '#stats:\t%.1f RPM in %d regions\n' % (total, index)
    if doDirectionality:
        footer += '#\t\t%d additional regions failed directionality filter\n' % failedCounter

    if doRevBackground:
        try:
            percent = 100. * (float(mIndex)/index)
        except:
            percent = 0.
    if percent > 100.:
        percent = 100.
    footer += '#%d regions (%.1f RPM) found in background (FDR = %.2f percent)\n' % (mIndex, mTotal, percent)
    if shiftValue == 'auto' and reportshift:
        bestShift = 0
        bestCount = 0
        for shift in sorted(shiftDict):
            if shiftDict[shift] > bestCount:
                bestShift = shift
                bestCount = shiftDict[shift]
        footer += '#mode of shift values: %d\n' % bestShift

    print footer
    outfile.write(footer)
    outfile.close()

    writeLog(logfilename, versionString, outfilename + footer.replace('\n#',' | ')[:-1])


if __name__ == "__main__":
    main(sys.argv)