/* boxClump - put together 2 dimensional boxes that
 * overlap with each other into clumps. */

/* Copyright (C) 2011 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */

#include "common.h"
#include "dlist.h"
#include "localmem.h"
#include "boxClump.h"


/** Some simple utility function on globally declared 
 ** data structures. **/


void boxClumpFree(struct boxClump **pClump)
/* Free boxClump. */
{
struct boxClump *clump = *pClump;
if (clump != NULL)
    {
    slFreeList(&clump->boxList);
    freez(pClump);
    }
}

void boxClumpFreeList(struct boxClump **pList)
/* Free list of boxClumps. */
{
struct boxClump *el, *next;

for (el = *pList; el != NULL; el = next)
    {
    next = el->next;
    boxClumpFree(&el);
    }
*pList = NULL;
}

int boxClumpCmpCount(const void *va, const void *vb)
/* Compare to sort based on count of boxes. */
{
const struct boxClump *a = *((struct boxClump **)va);
const struct boxClump *b = *((struct boxClump **)vb);
return b->boxCount - a->boxCount;
}

/** Local data structures. */

struct cluster
/* A cluster of boxes. */
    {
    struct box *boxList;	/* List of boxes. */
    struct dlNode *node;	/* Position in doubly-linked list. */
    };

struct box
/* A box in an alignment. */
    {
    struct box *next;		 /* Next in list (usually on cluster). */
    struct boxIn *in;		 /* Input box position. */
    struct cluster *cluster;     /* Cluster this is part of. */
    };

struct boxRef
/* Reference to a box. */
    {
    struct boxRef *next;
    struct box *box;
    };


/** Routines that cluster using mostly the local data
 ** structures. **/


static void mergeClusters(struct cluster *a, struct cluster *b)
/* Merge cluster b into cluster a, and free remnants of b. */
{
struct box *box;

/* Merging with yourself is always problematic. */
if (a == b)
    return;

/* Point all boxes in b to a. */
for (box = b->boxList; box != NULL; box = box->next)
    box->cluster = a;

/* Add b's boxList to end of a's. */
a->boxList = slCat(a->boxList, b->boxList);

/* Remove b from master cluster list. */
dlRemove(b->node);
}

static boolean allStartBy(struct boxRef *refList, int qStart, int tStart)
/* Return TRUE if qStart/qEnd of all boxes less than or equal to qStart/tStart 
 * This is an end condition for recursion along with only having two or
 * less boxes in the refList.  It handles the case where you have many
 * boxes stacked on top of each other. */
{
struct boxRef *ref;
for (ref = refList; ref != NULL; ref = ref->next)
    {
    struct boxIn *box = ref->box->in;
    if (box->qStart > qStart || box->tStart > tStart)
        return FALSE;
    }
return TRUE;
}

static boolean allSameCluster(struct boxRef *refList)
/* Return TRUE if qStart/qEnd of all boxes is the same */
{
struct boxRef *ref;
struct cluster *cluster = refList->box->cluster;
for (ref = refList->next; ref != NULL; ref = ref->next)
    {
    if (ref->box->cluster != cluster)
        return FALSE;
    }
return TRUE;
}

static struct lm *lm;
/* Local memory manager used for boxRefs and other stuff. */

static void rBoxJoin(struct boxRef *refList, 
	int qStart, int qEnd, int tStart, int tEnd)
/* Recursively cluster boxes. */
{
int boxCount = slCount(refList);

if (boxCount <= 1)
    {
    /* Easy: no merging required. */
    }
else if (boxCount == 2)
    {
    /* Decide if pair overlaps and if so merge. */
    struct box *a = refList->box;
    struct box *b = refList->next->box;
    if (rangeIntersection(a->in->qStart, a->in->qEnd, b->in->qStart, b->in->qEnd) > 0 &&
        rangeIntersection(a->in->tStart, a->in->tEnd, b->in->tStart, b->in->tEnd) > 0 )
	{
	mergeClusters(a->cluster, b->cluster);
	}
    else
        {
	/* Two non-overlapping boxes, we don't have to do anything. */
	}
    }
else if (allStartBy(refList, qStart, tStart))
    {
    /* If everybody contains the upper left corner, then they all can 
     * be merged.   This is the route taken often by clumps with lots
     * of overlap. */
    struct cluster *aCluster = refList->box->cluster;
    struct boxRef *ref;
    for (ref = refList->next; ref != NULL; ref = ref->next)
        {
	struct cluster *bCluster = ref->box->cluster;
	mergeClusters(aCluster, bCluster);
	}
    }
else if (allSameCluster(refList))
    {
    /* Everything is in the same cluster, no action required. */
    }
else
    {
    /* We can't yet figure out clumping, so break
     * up our window in two along larger dimension and
     * recurse on both subwindows. */
    struct boxRef *list1 = NULL, *list2 = NULL, *ref, *next;
    if (qEnd - qStart > tEnd - tStart)
        {
	int mid = (qStart + qEnd)>>1;
	for (ref = refList; ref != NULL; ref = next)
	    {
	    struct box *box = ref->box;
	    next = ref->next;
	    if (box->in->qEnd <= mid)
	        {
		slAddHead(&list1, ref);
		}
	    else if (box->in->qStart >= mid)
	        {
		slAddHead(&list2, ref);
		}
	    else
	        {
		/* Box crosses boundary, have to put it on both lists. */
		slAddHead(&list1, ref);
		lmAllocVar(lm, ref);
		ref->box = box;
		slAddHead(&list2, ref);
		}
	    }
	rBoxJoin(list1, qStart, mid, tStart, tEnd);
	rBoxJoin(list2, mid, qEnd, tStart, tEnd);
	}
    else
        {
	int mid = (tStart + tEnd)>>1;
	for (ref = refList; ref != NULL; ref = next)
	    {
	    struct box *box = ref->box;
	    next = ref->next;
	    if (box->in->tEnd <= mid)
	        {
		slAddHead(&list1, ref);
		}
	    else if (box->in->tStart >= mid)
	        {
		slAddHead(&list2, ref);
		}
	    else
	        {
		/* Box crosses boundary, have to put it on both lists. */
		slAddHead(&list1, ref);
		lmAllocVar(lm, ref);
		ref->box = box;
		slAddHead(&list2, ref);
		}
	    }
	rBoxJoin(list1, qStart, qEnd, tStart, mid);
	rBoxJoin(list2, qStart, qEnd, mid, tEnd);
	}
    }
}


struct boxClump *boxFindClumps(struct boxIn **pBoxList)
/* Convert list of boxes to a list of clumps.  Clumps
 * are collections of boxes that overlap.  Note that
 * the original boxList is overwritten as the boxes
 * are moved from it to the clumps. */
{
struct boxIn *in;
struct boxClump *clumpList = NULL, *clump;
struct boxRef *refList = NULL, *ref;
struct box *box;
struct cluster *cluster;
struct dlNode *node;
struct dlList clusterList;
int qStart = BIGNUM, qEnd = -BIGNUM;
int tStart = BIGNUM, tEnd = -BIGNUM;


dlListInit(&clusterList);
lm = lmInit(0);

/* Make local box structure, cluster, and reference
 * for each input box.  Calculate overall bounds. */
for (in = *pBoxList; in != NULL; in = in->next)
    {
    lmAllocVar(lm, box);
    box->in = in;
    if (in->qStart < qStart) qStart = in->qStart;
    if (in->qEnd > qEnd) qEnd = in->qEnd;
    if (in->tStart < tStart) tStart = in->tStart;
    if (in->tEnd > tEnd) tEnd = in->tEnd;
    lmAllocVar(lm,cluster);
    lmAllocVar(lm, cluster->node);
    cluster->node->val = cluster;
    dlAddTail(&clusterList, cluster->node);
    cluster->boxList = box;
    box->cluster = cluster;
    lmAllocVar(lm, ref);
    ref->box = box;
    slAddHead(&refList, ref);
    }

/* Call to recursive joiner. */
rBoxJoin(refList, qStart, qEnd, tStart, tEnd);

/* Copy from local memory and local data structures
 * to global memory and global data structures. */
for (node = clusterList.head; !dlEnd(node); node = node->next)
    {
    int boxCount = 0;
    int qStart = BIGNUM, qEnd = -BIGNUM;
    int tStart = BIGNUM, tEnd = -BIGNUM;
    struct boxIn *boxList = NULL;
    cluster = node->val;
    for (box = cluster->boxList; box != NULL; box = box->next)
	{
	in = box->in;
	slAddHead(&boxList, in);
	if (in->qStart < qStart) qStart = in->qStart;
	if (in->qEnd > qEnd) qEnd = in->qEnd;
	if (in->tStart < tStart) tStart = in->tStart;
	if (in->tEnd > tEnd) tEnd = in->tEnd;
	++boxCount;
	}
    if (boxCount > 0)
        {
	AllocVar(clump);
	slAddHead(&clumpList, clump);
	clump->boxList = boxList;
	clump->boxCount = boxCount;
	clump->qStart = qStart;
	clump->qEnd = qEnd;
	clump->tStart = tStart;
	clump->tEnd = tEnd;
	}
    }
*pBoxList = NULL;
lmCleanup(&lm);
return clumpList;
}

