#
#  distalPairs.py
#  ENRAGE
#
#  Created by Ali Mortazavi on 10/14/08.
#


try:
    import psyco
    psyco.full()
except:
    pass

import sys
import time
import optparse
import ReadDataset
from commoncode import getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption


def main(argv=None):
    if not argv:
        argv = sys.argv

    print "distalPairs: version 3.4"
    print "looks at all chromosomes simultaneously: is both slow and takes up large amount of RAM"
    usage = "usage: python %prog minDist rdsfile outfile [--sameChrom] [--splices] [--maxDist bp] [--verbose] [--cache cachepages]"

    parser = optparse.OptionParser(usage=usage)
    parser.add_option("--sameChrom", action="store_true", dest="sameChromOnly")
    parser.add_option("--splices", action="store_true", dest="doSplices")
    parser.add_option("--verbose", action="store_true", dest="doVerbose")
    parser.add_option("--maxDist", type="int", dest="maxDist")
    parser.add_option("--cache", type="int", dest="cachePages")
    parser.set_defaults(sameChromOnly=False, doSplices=False, doVerbose=False, maxDist=1000000000, cachePages=None)
    (options, args) = parser.parse_args(argv[1:])

    if len(args) < 3:
        print usage
        sys.exit(1)

    minDist = int(args[0])
    rdsfile = args[1]
    outfilename = args[2]

    distalPairs(minDist, rdsfile, outfilename, options.sameChromOnly, options.doSplices, options.doVerbose, options.maxDist, options.cachePages)


def makeParser(usage=""):
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("--sameChrom", action="store_true", dest="sameChromOnly")
    parser.add_option("--splices", action="store_true", dest="doSplices")
    parser.add_option("--verbose", action="store_true", dest="doVerbose")
    parser.add_option("--maxDist", type="int", dest="maxDist")
    parser.add_option("--cache", type="int", dest="cachePages")

    configParser = getConfigParser()
    section = "distalPairs"
    sameChromOnly = getConfigBoolOption(configParser, section, "sameChromOnly", False)
    doSplices = getConfigBoolOption(configParser, section, "doSplices", False)
    doVerbose = getConfigBoolOption(configParser, section, "doVerbose", False)
    maxDist = getConfigIntOption(configParser, section, "maxDist", 1000000000)
    cachePages = getConfigOption(configParser, section, "cachePages", None)

    parser.set_defaults(sameChromOnly=sameChromOnly, doSplices=doSplices, doVerbose=doVerbose, maxDist=maxDist, cachePages=cachePages)

    return parser


def distalPairs(minDist, rdsfile, outfilename, sameChromOnly=False, doSplices=False, doVerbose=False, maxDist=1000000000, cachePages=None):
    if cachePages is not None:
        doCache = True
    else:
        doCache = False
        cachePages = -1

    RDS = ReadDataset.ReadDataset(rdsfile, verbose = True, cache=doCache)
    if not RDS.hasIndex():
        print "Will not attempt to run on unIndexed dataset - please index with rdsmetadata.py and rerun"
        sys.exit(1)

    if cachePages > RDS.getDefaultCacheSize():
        RDS.setDBcache(cachePages)

    print time.ctime()

    print "getting uniq reads"    
    uniqDict = RDS.getReadsDict(withChrom=True, withPairID=True, doUniqs=True, readIDDict=True)
    print "got uniqs"

    if doSplices:
        addSplicesToUniqReads(RDS, uniqDict)

    if doVerbose:
        print len(uniqDict), time.ctime()

    outfile = open(outfilename,"w")
    diffChrom = 0
    distal = 0
    total = 0
    for readID in uniqDict:
        readList = uniqDict[readID]
        if len(readList) == 2:
            total += 1
            start1 = readList[0]["start"]
            sense1 = readList[0]["sense"]
            chrom1 = readList[0]["chrom"]
            start2 = readList[1]["start"]
            sense2 = readList[1]["sense"]
            chrom2 = readList[1]["chrom"]

            if chrom1 != chrom2:
                diffChrom += 1
                if sameChromOnly:
                    continue
                else:
                    outline = "%s\t%s\t%d\t%s\t%s\t%d\t%s" % (readID, chrom1, start1, sense1, chrom2, start2, sense2)
                    print >> outfile, outline
                    if doVerbose:
                        print diffChrom, outline
            else:
                dist = abs(start1 - start2)
                if minDist < dist < maxDist:
                    distal += 1
                    outline = "%s\t%s\t%d\t%s\t%d\t%s\t%d" % (readID, chrom1, start1, sense1, start2, sense2, dist)
                    print >> outfile, outline
                    if doVerbose:
                        print distal, outline

    outfile.write("#distal: %d\tdiffChrom: %d\tpossible: %d\n" % (distal, diffChrom, total))
    total = float(total)
    if total < 1:
        total = 1.

    outfile.write("#distal %2.2f pct\tdiffChrom %2.2f pct\n" % ((100. * distal/total), (100. * diffChrom/total)))
    outfile.close()
    print "distal: %d\tdiffChrom: %d\tpossible: %d" % (distal, diffChrom, int(total))
    print "distal: %2.2f pct\tdiffChrom: %2.2f pct\n" % ((100. * distal/total), (100. * diffChrom/total))
    print time.ctime()


def addSplicesToUniqReads(RDS, uniqDict):
    print "getting splices"
    splicesDict = RDS.getSplicesDict(withChrom=True, withPairID=True, readIDDict=True, splitRead=True)
    print "got splices"
    for readID in splicesDict:
        theRead = splicesDict[readID]
        read0 = theRead[0]
        del read0[1]
        try:
            uniqDict[readID].append(read0)
        except:
            if len(theRead) == 4:
                read2 = theRead[2]
                del read2[1]
                uniqDict[readID] = [read0,read2]


if __name__ == "__main__":
    main(sys.argv)