#
#  partition.py
#  ENRAGE
#
""" usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [options]
           where the regionfiles must be comma-separated with no white space
           -minFeature controls the size of the smallest partition
"""

try:
    import psyco
    psyco.full()
except:
    pass

import sys
import string
import optparse
from commoncode import getMergedRegions, writeLog, getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption

versionString = "partition: version 2.1"
print versionString


def main(argv=None):
    if not argv:
        argv = sys.argv

    usage = "usage: python %s mergeID regionfile1[,regionfile2,...] combpartitionfile [options]"

    parser = getParser(usage)
    (options, args) = parser.parse_args(argv[1:])

    if len(args) < 3:
        print usage
        sys.exit(1)

    mergeID = args[0]
    regionfiles = args[1]
    outfilename = args[2]

    if options.padregion:
        print "padding %d bp on each side of a region" % options.padregion

    if options.mergeregion:
        print "merging regions closer than %d bp" % options.mergeregion

    if options.locID:
        print "using locations as region ID"

    if options.ignoreRandom:
        print "ignoring 'random' chromosomes"

    partition(mergeID, regionfiles, outfilename, options.minFeature, options.cField,
              options.padregion, options.locID, options.ignoreRandom, options.mergeregion,
              options.merging, options.logfilename)


def getParser(usage):
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("--minFeature", type="int", dest="minFeature",
                      help="size of smallest partition")
    parser.add_option("--chromField", type="int", dest="cField",
                      help="num chromosome fields")
    parser.add_option("--padregion", type="int", dest="padregion",
                      help="padding on each side of region")
    parser.add_option("--mergeregion", type="int", dest="mergeregion",
                      help="bp threshold to merge regions")
    parser.add_option("--nomerge", action="store_false", dest="merging",
                      help="do not merge regions")
    parser.add_option("--log", dest="logfilename",
                      help="log file")
    parser.add_option("--locID", action="store_true", dest="locID",
                      help="use location as region ID")
    parser.add_option("--norandom", action="store_true", dest="ignoreRandom",
                      help="ignore 'random' chromosomes")

    configParser = getConfigParser()
    section = "partition"
    minFeature = getConfigIntOption(configParser, section, "minFeature", 25)
    cField = getConfigIntOption(configParser, section, "cField", 1)
    padregion = getConfigIntOption(configParser, section, "padregion", 1)
    locID = getConfigBoolOption(configParser, section, "locID", False)
    ignoreRandom = getConfigBoolOption(configParser, section, "ignoreRandom", False)
    mergeregion = getConfigIntOption(configParser, section, "mergeregion", 0)
    merging = getConfigBoolOption(configParser, section, "merging", True)
    logfilename = getConfigOption(configParser, section, "logfilename", "partition.log")

    parser.set_defaults(minFeature=minFeature, cField=cField, padregion=padregion, locID=locID,
                        ignoreRandom=ignoreRandom, mergeregion=mergeregion, merging=merging,
                        logfilename=logfilename)

    return parser


def partition(mergeID, regionfiles, outfilename, minFeature=25, cField=1, padregion=0,
              locID=False, ignoreRandom=False, mergeregion=0, merging=True,
              logfilename="partition.log"):

    writeLog(logfilename, versionString, string.join(sys.argv[1:]))

    allregionsDict = {}
    regionFileList = regionfiles.split(',')
    numRegions = len(regionFileList)
    chromList = []
    for regionID in range(numRegions):
        allregionsDict[regionID] = getMergedRegions(regionFileList[regionID], maxDist = mergeregion,
                                                    minHits=-1, fullChrom=True, verbose=True, chromField=cField,
                                                    doMerge=merging, pad=padregion)

        for achrom in allregionsDict[regionID]:
            if achrom not in chromList:
                chromList.append(achrom)
            
    outregionDict = {}

    chromList = sorted(chromList)

    for chrom in chromList:
        if ignoreRandom and "random" in chrom:
            continue

        outregionDict[chrom] = []
        pointList = []
        for regionID in range(numRegions):
            if chrom in allregionsDict[regionID]:
                for region in allregionsDict[regionID][chrom]:
                    pointList.append(region.start)
                    pointList.append(region.stop)

        pointList.sort()
        start = 0
        for point in pointList:
            if (point - start) > minFeature:
                outregionDict[chrom].append((start, point - 1, point - 1 - start))
                start = point

    outfile = open(outfilename, "w")
    if locID:
        outfile.write("#chrom:start-stop\tchrom\tstart\tstop\tlength_kb\n")
    else:
        outfile.write("#labelID\tchrom\tstart\tstop\tlength_kb\n")

    index = 0
    for chrom in outregionDict:
        for (start, stop, length) in outregionDict[chrom]:
            index += 1
            if locID:
                label = "%s:%d-%d" % (chrom, start, stop)
            else:
                label = "%s%d" % (mergeID, index)

            outfile.write("%s\t%s\t%d\t%d\t%.3f\n" % (label, chrom, start, stop, length/1000.))

    message = "%s was partitioned into %d regions" % (mergeID, index)
    print message
    writeLog(logfilename, versionString, message)

if __name__ == "__main__":
    main(sys.argv)