/* Copyright (C) 2011 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */

/* makeGraph - this module creates a txGraph from a list of 
 * linkedBeds.  The main steps to this process are:
 *   1) Create list of all unique start and end points in
 *      beds.  These are 'hard' if they are at a real splice
 *      site and 'soft' otherwise.
 *   2) Create list of all unique edges. Edges include evidence
 *      if any.
 *   3) Snap soft ends to nearby hard ends of compatible type.
 *   4) For half-soft edges, examine all other edges that share
 *      the hard end, and and that are hard on both ends.  Merge
 *      the half-soft edge with the most similar such double-hard
 *      edge, favoring extensions of edge over truncations. 
 *   5) For edges that are half soft that can't yet be snapped, 
 *      merge all soft ends to a single point that is a consensus favoring 
 *      larger transcripts.
 *    6) Merge together all overlapping edges that are soft on both sides.  Make
 *     the edge boundaries equal to the median value of all the merged boundaries. 
 * Through most of the algorithm the graph is represented as two binary trees, one
 * for vertices and one for edges. At the end this is converted to the txGraph
 * structure. */

#include "common.h"
#include "hash.h"
#include "localmem.h"
#include "rbTree.h"
#include "rangeTree.h"
#include "dlist.h"
#include "dnautil.h"
#include "bed.h"
#include "ggTypes.h"
#include "nibTwo.h"
#include "txGraph.h"
#include "txBedToGraph.h"

struct evidence
/* One piece of evidence. */
    {
    struct evidence *next;
    struct linkedBeds *lb;	/* Associated input data. */
    int start,end;		/* Bounds of evidence. */
    };

struct vertex
/* A point in our graph */
    {
    int position;		/* Position in genome. */
    enum ggVertexType type;	/* hard/soft? start/end? */
    struct vertex *movedTo;	/* Forwarding address (temporary) */
    struct slRef *waysIn;	/* References to preceeding edges (temporary). */
    struct slRef *waysOut;	/* References to following edges (temporary). */
    int count;			/* Number of times used, or index of vertex (temporary). */
    };

struct edge
/* An edge in our graph */
    {
    struct edge *next;          /* for forming a singly-linked list of edges */
    struct vertex *start;	/* Starting vertex. */
    struct vertex *end;		/* Ending vertex. */
    struct evidence *evList;	/* List of evidence. */
    };


static int vertexCmp(void *va, void *vb)
/* Return: if a before b, return negative.  If a after b, return positive.  
 * else if a and b have the same position and same type, return 0.
 * else if a and b have same position but different types, return 
 * a nonzero value reflecting the difference in their types. */
{
struct vertex *a = va;
struct vertex *b = vb;
int diff = a->position - b->position;
if (diff == 0)
    diff = a->type - b->type;
return diff;
}

static int edgeCmp(void *va, void *vb)
/* Return -1 if a before b,  0 if a and b overlap,
 * and 1 if a after b. */
{
struct edge *a = va;
struct edge *b = vb;
int diff = vertexCmp(a->start, b->start);
if (diff == 0)
    diff = vertexCmp(a->end, b->end);
return diff;
}

static boolean encloses(struct edge *a, struct edge *b)
/* Return TRUE if the first edge encloses the second, false otherwise */
{
if (a->start->position <= b->start->position
    && a->end->position >= b->end->position)
    {
    return(TRUE);
    } 
else 
    {
    return(FALSE);
    }
}

static boolean trustedEdge(struct edge *edge)
/* Return TRUE if any member of edge evidence is from a trusted source. */
{
struct evidence *ev;
for (ev = edge->evList; ev != NULL; ev = ev->next)
    {
    if (trustedSource(ev->lb->sourceType))
        return TRUE;
    }
return FALSE;
}


static struct vertex *matchingVertex(struct rbTree *tree, int position, enum ggVertexType type)
/* Find matching vertex.  Return NULL if none. */
{
struct vertex temp;
temp.position = position;
temp.type = type;
return rbTreeFind(tree, &temp);
}

static struct vertex *addUniqueVertex(struct rbTree *tree, int position, enum ggVertexType type)
/* Find existing vertex if it exists, otherwise create and return new one. */
{
struct vertex *v = matchingVertex(tree, position, type);
if (v == NULL)
    {
    lmAllocVar(tree->lm, v);
    v->position = position;
    v->type = type;
    rbTreeAdd(tree, v);
    }
return v;
}

static struct edge *matchingEdge(struct rbTree *tree, struct vertex *start, struct vertex *end)
/* Find matching edge. Return NULL if none. */
{
struct edge temp;
temp.start = start;
temp.end = end;
return rbTreeFind(tree, &temp);
}

static struct edge *addUniqueEdge(struct rbTree *tree, struct vertex *start, struct vertex *end,
	struct linkedBeds *lb)
/* Find existing edge if it exists.  Otherwise create and return new one. 
 * Regardless add lb as evidence to edge. */
{
struct edge *e = matchingEdge(tree, start, end);
if (e == NULL)
    {
    lmAllocVar(tree->lm, e);
    e->start = start;
    e->end = end;
    e->next = NULL;
    rbTreeAdd(tree, e);
    }
struct evidence *ev;
lmAllocVar(tree->lm, ev);
ev->lb = lb;
ev->start = start->position;
ev->end = end->position;
slAddHead(&e->evList, ev);
return e;
}

static struct rbTree *makeVertexTree(struct linkedBeds *lbList)
/* Make tree of unique vertices. */
{
struct rbTree *vertexTree = rbTreeNew(vertexCmp);
struct linkedBeds *lb;
for (lb = lbList; lb != NULL; lb = lb->next)
    {
    struct bed *bed;
    for (bed = lb->bedList; bed != NULL; bed = bed->next)
        {
	/* Add very beginning and end, they'll be soft. */
	addUniqueVertex(vertexTree, bed->chromStart, ggSoftStart);
	addUniqueVertex(vertexTree, bed->chromEnd, ggSoftEnd);

	/* Add internal hard ends. */
	int i, lastBlock = bed->blockCount-1;
	for (i=0; i<lastBlock; ++i)
	    {
	    addUniqueVertex(vertexTree, 
	    	bed->chromStart + bed->chromStarts[i] + bed->blockSizes[i], ggHardEnd);
	    addUniqueVertex(vertexTree, 
	    	bed->chromStart + bed->chromStarts[i+1], ggHardStart);
	    }
	}
    }
return vertexTree;
}

static struct rbTree *makeEdgeTree(struct linkedBeds *lbList, struct rbTree *vertexTree)
/* Make tree of unique edges. */
{
struct rbTree *edgeTree = rbTreeNew(edgeCmp);
struct linkedBeds *lb;
for (lb = lbList; lb != NULL; lb = lb->next)
    {
    struct bed *bed, *nextBed;
    for (bed = lb->bedList; bed != NULL; bed = nextBed)
        {
	nextBed = bed->next;

	/* Loop to add all introns and all but last exon. */
	struct vertex *start = matchingVertex(vertexTree, bed->chromStart, ggSoftStart);
	int i, lastBlock = bed->blockCount-1;
	for (i=0; i<lastBlock; ++i)
	    {
	    /* Add exon */
	    struct vertex *end = matchingVertex(vertexTree,
		start->position + bed->blockSizes[i], ggHardEnd);
	    addUniqueEdge(edgeTree, start, end, lb);

	    /* Add intron */
	    start = matchingVertex(vertexTree, 
		    bed->chromStart + bed->chromStarts[i+1], ggHardStart);
	    addUniqueEdge(edgeTree, end, start, lb);
	    }

	/* Add final exon */
	struct vertex *end = matchingVertex(vertexTree, bed->chromEnd, ggSoftEnd);
	addUniqueEdge(edgeTree, start, end, lb);

	/* If there's another bed to go, add a soft intron connecting it. */
	if (nextBed != NULL)
	    {
	    start = matchingVertex(vertexTree, nextBed->chromStart, ggSoftStart);
	    addUniqueEdge(edgeTree, end, start, lb);
	    }
	}
    }
return edgeTree;
}

void incVertexUses(void *item)
/* Callback to clear edge->start->useCount and edge->end->useCount. */
{
struct edge *edge = item;
edge->start->count += 1; 
edge->end->count += 1;
}

void removeUnusedVertices(struct rbTree *vertexTree, struct rbTree *edgeTree)
/* Remove vertices not connected to any edges. */
{
/* Get vertex list and clear counts. */
struct slRef *vRef, *vRefList = rbTreeItems(vertexTree);
for (vRef = vRefList; vRef != NULL; vRef = vRef->next)
    {
    struct vertex *v = vRef->val;
    v->count = 0;
    }

/* Inc counts of vertices connected to edges. */
rbTreeTraverse(edgeTree, incVertexUses);

/* Remove unused vertices. */
for (vRef = vRefList; vRef != NULL; vRef = vRef->next)
    {
    struct vertex *v = vRef->val;
    if (v->count == 0)
        rbTreeRemove(vertexTree, v);
    }

slFreeList(&vRefList);
}

void removeEmptyEdges(struct rbTree *vertexTree, struct rbTree *edgeTree)
/* Remove edges that are zero or negative in length. */
{
int removeCount = 0;
struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree);
for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
    {
    struct edge *edge = edgeRef->val;
    if (edge->start->position >= edge->end->position)
        {
	removeCount += 1;
	rbTreeRemove(edgeTree, edge);
	}
    }
if (removeCount)
    removeUnusedVertices(vertexTree, edgeTree);
slFreeList(&edgeRefList);
}


static struct dlList *sortedListFromTree(struct rbTree *tree)
/* Create a double-linked list from tree. List will be sorted.  */
{
struct slRef *ref, *refList = rbTreeItems(tree);
struct dlList *list = dlListNew();
for (ref = refList; ref != NULL; ref = ref->next)
    dlAddValTail(list, ref->val);
slFreeList(&refList);
return list;
}

void addWaysInAndOut(struct rbTree *vertexTree, struct rbTree *edgeTree,
	struct lm *lm)
/* Put a bunch of ways in and out of the graph onto the edges. */
{
struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree);
for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
    {
    struct edge *edge = edgeRef->val;
    struct slRef *inRef, *outRef;
    lmAllocVar(lm, inRef);
    lmAllocVar(lm, outRef);
    inRef->val = outRef->val = edge;
    slAddHead(&edge->start->waysOut, outRef);
    slAddHead(&edge->end->waysIn, inRef);
    }
slFreeList(&edgeRefList);
}

static boolean checkSeqSimilar(struct dnaSeq *a, struct dnaSeq *b, int minScore)
/* Check that two sequences are similar.  Assumes sequences are same size. */
{
int size = a->size;
int size1 = size-1;
return dnaScoreMatch(a->dna, b->dna, size) >= minScore 
	|| dnaScoreMatch(a->dna+1, b->dna, size1) >= minScore
	|| dnaScoreMatch(a->dna, b->dna+1, size1) >= minScore;
}

static boolean uncuttableEdge(struct edge *e)
/* Return TRUE if any evidence for edge is uncuttable (ccds) */
{
struct evidence *ev;
for (ev = e->evList; ev != NULL; ev = ev->next)
    if (noCutSource(ev->lb->sourceType))
        return TRUE;
return FALSE;
}

static boolean vertexPartOfUncuttableEdge(struct vertex *v, boolean isStart)
/* See if vertex is part of an uncuttable edge. */
{
struct slRef *eRef, *eRefList = (isStart ? v->waysOut : v->waysIn);
assert(eRefList != NULL);
for (eRef = eRefList; eRef != NULL; eRef = eRef->next)
    if (uncuttableEdge(eRef->val))
	return TRUE;
return FALSE;
}

static boolean checkSnapOk(struct vertex *vOld, struct vertex *vNew, boolean isRev, 
	int bleedSize, int maxUncheckedSize, struct nibTwoCache *seqCache, char *chromName)
/* Load sequence that corresponds to bleed-over, and  make sure that sequence of next
 * exon is similar. */
{
if (vertexPartOfUncuttableEdge(vOld, isRev))
    return FALSE;
if (bleedSize <= maxUncheckedSize)
    return TRUE;
int minScore = bleedSize-2;
boolean similar = FALSE;
if (isRev)
    {
    int oldStart = vOld->position;
    struct slRef *eRef;
    struct dnaSeq *oldSeq = nibTwoCacheSeqPartExt(seqCache, chromName, oldStart, bleedSize, FALSE, NULL);
    for (eRef = vNew->waysIn; eRef != NULL; eRef = eRef->next)
        {
	struct edge *edge = eRef->val;
	struct vertex *vRest = edge->start;
	int newStart = vRest->position - bleedSize;
	struct dnaSeq *newSeq = nibTwoCacheSeqPartExt(seqCache, chromName, newStart, bleedSize, FALSE, NULL);
	similar = checkSeqSimilar(oldSeq, newSeq, minScore);
	dnaSeqFree(&newSeq);
	if (similar)
	    break;
	}
    dnaSeqFree(&oldSeq);
    }
else
    {
    int oldStart = vOld->position - bleedSize;
    struct slRef *eRef;
    struct dnaSeq *oldSeq = nibTwoCacheSeqPartExt(seqCache, chromName, oldStart, bleedSize, FALSE, NULL);
    for (eRef = vNew->waysOut; eRef != NULL; eRef = eRef->next)
        {
	struct edge *edge = eRef->val;
	struct vertex *vRest = edge->end;
	int newStart = vRest->position;
	struct dnaSeq *newSeq = nibTwoCacheSeqPartExt(seqCache, chromName, newStart, bleedSize, FALSE, NULL);
	similar = checkSeqSimilar(oldSeq, newSeq, minScore);
	dnaSeqFree(&newSeq);
	if (similar)
	    break;
	}
    }
return similar;
}

static boolean snapVertex(struct dlNode *oldNode, int maxSnapSize, int maxUncheckedSnapSize,
	struct nibTwoCache *seqCache, char *chromName)
/* Snap vertex to nearby previous hard vertex. */
{
/* Figure out hard type we want to snap to, and return if not
 * soft to begin with. */
struct vertex *vOld = oldNode->val;
int oldPos = vOld->position;
enum ggVertexType currentType = vOld->type;

/* If no seqCache, then set up things so not bothering to check it. */
if (seqCache == NULL)
    maxUncheckedSnapSize = maxSnapSize;

/* Search backwards */
if (currentType == ggSoftEnd)
    {
    struct dlNode *node;
    for (node = oldNode->prev; !dlStart(node); node = node->prev)
	{
	struct vertex *v = node->val;
	int newPos = v->position;
	int dif = oldPos - newPos;
	if (dif > maxSnapSize)
	    break;
	if (v->type == ggHardEnd)
	    {
	    if (checkSnapOk(vOld, v, FALSE, dif, maxUncheckedSnapSize, seqCache, chromName))
		{
		vOld->movedTo = v;
		verbose(3, "snapping vertex %d to %d\n", oldPos, newPos);
		return TRUE;
		}
	    }
	}
    }

/* Search forward */
else if (currentType == ggSoftStart)
    {
    struct dlNode *node;
    for (node = oldNode->next; !dlEnd(node); node = node->next)
	{
	struct vertex *v = node->val;
	int newPos = v->position;
	int dif = newPos - oldPos;
	if (dif > maxSnapSize)
	    break;
	if (v->type == ggHardStart)
	    {
	    if (checkSnapOk(vOld, v, TRUE, dif, maxUncheckedSnapSize, seqCache, chromName))
		{
		vOld->movedTo = v;
		return TRUE;
		}
	    }
	}
    }
return FALSE;
}

void mergeOrAddEdge(struct rbTree *edgeTree, struct edge *edge)
/* Add edge back if it is still unique, otherwise move evidence from
 * edge into existing edge. */
{
struct edge *existing = rbTreeFind(edgeTree, edge);
if (existing)
    {
    existing->evList = slCat(existing->evList, edge->evList);
    edge->evList = NULL;
    }
else
    rbTreeAdd(edgeTree, edge);
}

void updateForwardedEdges(struct rbTree *edgeTree)
/* Go through edges, following the movedTo's in the
 * vertices if need be. */
{
struct slRef *ref, *refList = rbTreeItems(edgeTree);
int forwardCount = 0;
for (ref = refList; ref != NULL; ref = ref->next)
    {
    struct edge *edge = ref->val;
    struct vertex *start = edge->start, *end = edge->end;
    if (start->movedTo || end->movedTo)
        {
	++forwardCount;
	rbTreeRemove(edgeTree, edge);
	if (start->movedTo)
	    edge->start = start->movedTo;
	if (end->movedTo)
	    edge->end = end->movedTo;
	mergeOrAddEdge(edgeTree, edge);
	}
    }
if (forwardCount > 0)
    verbose(3, "Forwarded %d edges.\n", forwardCount);
slFreeList(&refList);
}

void snapSoftToCloseHard(struct rbTree *vertexTree, struct rbTree *edgeTree, int maxSnapSize,
	int maxUncheckedSnapSize, struct nibTwoCache *seqCache, char *chromName)
/* Snap hard vertices to nearby soft vertices of same type. */
{
struct lm *lm = lmInit(0);
addWaysInAndOut(vertexTree, edgeTree, lm);
struct dlList *vList = sortedListFromTree(vertexTree);
struct dlNode *node;
int snapCount = 0;
for (node = vList->head; !dlEnd(node); node = node->next)
    {
    if (snapVertex(node, maxSnapSize, maxUncheckedSnapSize, seqCache, chromName))
	{
	rbTreeRemove(vertexTree, node->val);
        ++snapCount;
	}
    }
/* Clean up ways in and out since have removed some nodes. */
for (node = vList->head; !dlEnd(node); node = node->next)
    {
    struct vertex *v = node->val;
    v->waysIn = v->waysOut = NULL;
    }
if (snapCount > 0)
    {
    verbose(3, "Snapped %d close edges, now have %d vertices\n", snapCount, vertexTree->n);
    updateForwardedEdges(edgeTree);
    }
dlListFree(&vList);
lmCleanup(&lm);
}

int edgeRefCmpEnd(const void *va, const void *vb)
/* Comparison to sort a slRef list of edges
 * by the edge end location. */
{
const struct slRef *a = *((struct slRef **)va);
const struct slRef *b = *((struct slRef **)vb);
struct edge *aEdge = a->val;
struct edge *bEdge = b->val;
return aEdge->end->position - bEdge->end->position;
}

int edgeRefCmpStartRev(const void *va, const void *vb)
/* Comparison to sort a slRef list of edges
 * by the edge start location, with biggest biggest
 * starts first. */
{
const struct slRef *a = *((struct slRef **)va);
const struct slRef *b = *((struct slRef **)vb);
struct edge *aEdge = a->val;
struct edge *bEdge = b->val;
return bEdge->end->position - aEdge->end->position;
}



int snapHalfHardForward(struct vertex *v, struct rbTree *edgeTree,
	enum ggVertexType softType, enum ggVertexType hardType)
/* V is a hard vertex. Try to snap soft end vertices connected to v
 * to nearest hard vertex connected to v. */
{
int snapCount = 0;
// enum ggVertex otherHardType = (hardType == ggHardStart ? ggHardEnd : ggHardStart);
slSort(&v->waysOut, edgeRefCmpEnd); 
struct slRef *hardRef = v->waysOut, *softRef = v->waysOut;
for (;;)
    {
    /* Advance softRef to next soft ended edge. */
    for (;softRef != NULL; softRef = softRef->next)
        {
	struct edge *edge = softRef->val;
	if (edge->end->type == softType)
	    break;
	}
    if (softRef == NULL)
        break;
    struct edge *softEdge = softRef->val;

    /* If hardRef is before softRef (or it's not hard) advance it */
    for (;hardRef != NULL; hardRef = hardRef->next)
        {
	struct edge *edge = hardRef->val;
	if (edge->end->type == hardType && edge->end->position >= softEdge->end->position)
	   break;
	}
    if (hardRef == NULL)
        break;
    rbTreeRemove(edgeTree, softEdge);
    struct edge *hardEdge = hardRef->val;
    verbose(3, "Snapping half-hard edge ending at %d to %d\n", softEdge->end->position, 
	    hardEdge->end->position); 
    softEdge->end = hardEdge->end;
    ++snapCount;
    mergeOrAddEdge(edgeTree, softEdge);
    softRef = softRef->next;
    }
if (snapCount > 0)
    verbose(3, "Snapped %d forward\n", snapCount);
return snapCount;
}

int snapHalfHardBackward(struct vertex *v, struct rbTree *edgeTree,
	enum ggVertexType softType, enum ggVertexType hardType)
/* V is a hard vertex. Try to snap soft start vertices connected to v
 * to nearest hard vertex connected to v. */
{
int snapCount = 0;
// enum ggVertex otherHardType = (hardType == ggHardStart ? ggHardEnd : ggHardStart);
slSort(&v->waysIn, edgeRefCmpStartRev); 
struct slRef *hardRef = v->waysIn, *softRef = v->waysIn;
for (;;)
    {
    /* Advance softRef to next soft ended edge. */
    for (;softRef != NULL; softRef = softRef->next)
        {
	struct edge *edge = softRef->val;
	if (edge->start->type == softType)
	    break;
	}
    if (softRef == NULL)
        break;
    struct edge *softEdge = softRef->val;

    /* If hardRef is before softRef (or it's not hard) advance it */
    for (;hardRef != NULL; hardRef = hardRef->next)
        {
	struct edge *edge = hardRef->val;
	if (edge->start->type == hardType && edge->start->position <= softEdge->start->position)
	   break;
	}
    if (hardRef == NULL)
        break;
    rbTreeRemove(edgeTree, softEdge);
    struct edge *hardEdge = hardRef->val;
    verbose(3, "Snapping half-hard edge starting at %d to %d\n", softEdge->start->position, 
	    hardEdge->start->position); 
    softEdge->start = hardEdge->start;
    ++snapCount;
    mergeOrAddEdge(edgeTree, softEdge);
    softRef = softRef->next;
    }
if (snapCount > 0)
    verbose(3, "Snapped %d reverse\n", snapCount);
return snapCount;
}

static int snapHalfHard(struct vertex *v, struct rbTree *edgeTree)
/* Snap soft ends of half-hard edges connected to v to 
 * compatible hard end.  Returns number of snaps it's made. */
{
enum ggVertexType currentType = v->type;
enum ggVertexType softType, hardType;

/* Figure out what types look for at the other end.  If we're
 * a soft vertex we can't do anything. */
if (currentType == ggHardStart)
    {
    softType = ggSoftEnd;
    hardType = ggHardEnd;
    }
else if (currentType == ggHardEnd)
    {
    softType = ggSoftStart;
    hardType = ggHardStart;
    }
else
    return 0;

int snapCount = 0;
snapCount += snapHalfHardForward(v, edgeTree, softType, hardType);
snapCount += snapHalfHardBackward(v, edgeTree, softType, hardType);
return snapCount;
}

void snapHalfHards(struct rbTree *vertexTree, struct rbTree *edgeTree)
/* Snap edges that are half hard to edges that are fully hard and that
 * share the original hard end */
{
struct lm *lm = lmInit(0);
addWaysInAndOut(vertexTree, edgeTree, lm);
struct slRef *vRef, *vRefList = rbTreeItems(vertexTree);
for (vRef = vRefList; vRef != NULL; vRef = vRef->next)
    {
    struct vertex *v = vRef->val;
    snapHalfHard(v, edgeTree);
    v->waysIn = v->waysOut = NULL;	/* These are trashed so remove them. */
    }
slFreeList(&vRefList);
removeUnusedVertices(vertexTree, edgeTree);
lmCleanup(&lm);
}

struct sourceAndPos
/* A vertex source and position. */
    {
    struct sourceAndPos *next;
    int position;		/* Position in genome. */
    bool trustedSource;		/* If true this is trusted source (refSeq) */
    };

int sourceAndPosCmp(const void *va, const void *vb)
/* Compare two sourceAndPos by position. */
{
const struct sourceAndPos *a = *((struct sourceAndPos **)va);
const struct sourceAndPos *b = *((struct sourceAndPos **)vb);
return a->position - b->position;
}

int sourceAndPosCmpRev(const void *va, const void *vb)
/* Compare two sourceAndPos in reverse direction. */
{
const struct sourceAndPos *a = *((struct sourceAndPos **)va);
const struct sourceAndPos *b = *((struct sourceAndPos **)vb);
return b->position - a->position;
}

static struct vertex *consensusVertex(struct rbTree *vertexTree, struct sourceAndPos *list, 
	int listSize, enum ggVertexType softType)
/* Return first trusted (refSeq) vertex, or if no trusted one, then 
 * a soft vertex 1/4 of way (rounded down) through list. */
{
struct sourceAndPos *el;
for (el = list; el != NULL; el = el->next)
    {
    if (el->trustedSource)
        break;
    }
if (el == NULL)
    el = slElementFromIx(list, listSize/4);
return matchingVertex(vertexTree, el->position, softType);
}

void halfConsensusForward(struct vertex *v, 
	struct rbTree *vertexTree, struct rbTree *edgeTree,
	enum ggVertexType softType, struct lm *lm)
/* Figure out consensus end of all edges beginning at v that have soft end. */
{
/* Collect a list of all attached softies. */
struct sourceAndPos *list = NULL, *el;
struct slRef *edgeRef;
int softCount = 0;
for (edgeRef = v->waysOut; edgeRef != NULL; edgeRef = edgeRef->next)
    {
    struct edge *edge = edgeRef->val;
    struct vertex *v = edge->end;
    if (v->type == softType)
        {
	struct evidence *ev;
	for (ev = edge->evList; ev != NULL; ev = ev->next)
	    {
	    lmAllocVar(lm, el);
	    el->position = ev->end;
	    el->trustedSource = trustedSource(ev->lb->sourceType);
	    slAddHead(&list, el);
	    ++softCount;
	    }
	}
    }

/* See if have enough elements to make consensus forming
 * worthwhile. */
if (softCount > 1)
    {
    slSort(&list, sourceAndPosCmpRev);
    struct vertex *end = consensusVertex(vertexTree, list, softCount, softType);
    for (edgeRef = v->waysOut; edgeRef != NULL; edgeRef = edgeRef->next)
	{
	struct edge *edge = edgeRef->val;
	struct vertex *v = edge->end;
	if (v != end && v->type == softType)
	    {
	    rbTreeRemove(edgeTree, edge);
	    verbose(3, "Performing half-hard consensus: moving edge end from %d to %d\n",
		    edge->end->position, end->position);
	    edge->end = end;
	    mergeOrAddEdge(edgeTree, edge);  // Will always merge. 
	    }
	}
    }
}


void halfConsensusBackward(struct vertex *v, 
	struct rbTree *vertexTree, struct rbTree *edgeTree,
	enum ggVertexType softType, struct lm *lm)
/* Figure out consensus start of all edges end at v that have soft start. */
{
/* Collect a list of all attached softies. */
struct sourceAndPos *list = NULL, *el;
struct slRef *edgeRef;
int softCount = 0;
for (edgeRef = v->waysIn; edgeRef != NULL; edgeRef = edgeRef->next)
    {
    struct edge *edge = edgeRef->val;
    struct vertex *v = edge->start;
    if (v->type == softType)
        {
	struct evidence *ev;
	for (ev = edge->evList; ev != NULL; ev = ev->next)
	    {
	    lmAllocVar(lm, el);
	    el->position = ev->start;
	    el->trustedSource = trustedSource(ev->lb->sourceType);
	    slAddHead(&list, el);
	    ++softCount;
	    }
	}
    }

/* See if have enough elements to make consensus forming
 * worthwhile. */
if (softCount > 1)
    {
    slSort(&list, sourceAndPosCmp);
    struct vertex *start = consensusVertex(vertexTree, list, softCount, softType);
    for (edgeRef = v->waysIn; edgeRef != NULL; edgeRef = edgeRef->next)
	{
	struct edge *edge = edgeRef->val;
	struct vertex *v = edge->start;
	if (v != start && v->type == softType)
	    {
	    rbTreeRemove(edgeTree, edge);
	    verbose(3, "Performing half-hard consensus: moving edge start from %d to %d\n",
		    edge->start->position, start->position);
	    edge->start = start;
	    mergeOrAddEdge(edgeTree, edge);  // Will always merge. 
	    }
	}
    }
}

void halfHardConsensus(struct vertex *v, 
	struct rbTree *vertexTree, struct rbTree *edgeTree,
	struct lm *lm)
/* Form consensus of soft vertices connected to hard vertex v. 
 * Consensus is:
 *    1) The largest refSeq.
 *    2) If no refSeq, something 3/4 of the way to the largest. */
{
enum ggVertexType currentType = v->type;
enum ggVertexType softType;

/* Figure out what types look for at the other end.  If we're
 * a soft vertex we can't do anything. */
if (currentType == ggHardStart)
    softType = ggSoftEnd;
else if (currentType == ggHardEnd)
    softType = ggSoftStart;
else
    return;

halfConsensusForward(v, vertexTree, edgeTree, softType, lm);
halfConsensusBackward(v, vertexTree, edgeTree, softType, lm);
}

void halfHardConsensuses(struct rbTree *vertexTree, struct rbTree *edgeTree)
/* Collect soft edges that share a hard edge. Move all of them to a consensus
 * vertex, which is either:
 *    1) The largest refSeq.
 *    2) If no refSeq, something 3/4 of the way to the largest. */
{
struct lm *lm = lmInit(0);
addWaysInAndOut(vertexTree, edgeTree, lm);
struct slRef *vRef, *vRefList = rbTreeItems(vertexTree);
for (vRef = vRefList; vRef != NULL; vRef = vRef->next)
    {
    struct vertex *v = vRef->val;
    halfHardConsensus(v, vertexTree, edgeTree, lm);
    v->waysIn = v->waysOut = NULL;	/* These are trashed so remove them. */
    }
slFreeList(&vRefList);
removeUnusedVertices(vertexTree, edgeTree);
lmCleanup(&lm);
}

struct edge *edgeFromConsensusOfEvidence(struct rbTree *vertexTree, struct evidence *evList,
	struct lm *lm)
/* Attempt to create a single edge from a list of overlapping evidence ranges.
 * The start will be the consensus of all evidence starts.  Likewise
 * the end will be the consensus of all evidence ends.  The evidence that
 * overlaps this edge will be included in the edge. */
{
/* Gather up lists of starts and ends. */
struct sourceAndPos *startList = NULL, *endList = NULL;
struct evidence *ev, *nextEv;
int listSize = 0;
for (ev = evList; ev != NULL; ev = ev->next)
    {
    struct sourceAndPos *x;
    boolean trusted = trustedSource(ev->lb->sourceType);
    lmAllocVar(lm, x);
    x->position = ev->start;
    x->trustedSource = trusted;
    slAddHead(&startList, x);
    lmAllocVar(lm, x);
    x->position = ev->end;
    x->trustedSource = trusted;
    slAddHead(&endList, x);
    ++listSize;
    }

/* Get consensus starts and ends. */
slSort(&startList, sourceAndPosCmp);
struct vertex *start = consensusVertex(vertexTree, startList, listSize, ggSoftStart);
slSort(&endList, sourceAndPosCmpRev);
struct vertex *end = consensusVertex(vertexTree, endList, listSize, ggSoftEnd);

/* Make edge */
struct edge *edge;
AllocVar(edge);
edge->start = start;
edge->end = end;
edge->next = NULL;

/* Add overlapping evidence to edge. */
for (ev = evList; ev != NULL; ev = nextEv)
    {
    nextEv = ev->next;
    if (rangeIntersection(ev->start, ev->end, start->position, end->position) > 0)
        slAddHead(&edge->evList, ev);
    }

return edge;
}

void removeEnclosedDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree, 
	int maxBleedOver, double singleExonMaxOverlap)
/* Move double-softs that overlap spliced things to a very great extent into
 * the spliced things. Also remove tiny double-softs (no more than 2*maxBleedOver). */
{
/* Traverse graph and build up range tree covering spliced exons.  For each 
 * range of overlapping exons, assemble a singly-linked list of all exons in 
 * the range */
struct rbTree *rangeTree = rangeTreeNew(0);
struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree);
int removedCount = 0;
for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
    {
    struct edge *edge = edgeRef->val;
    struct vertex *start = edge->start;
    struct vertex *end = edge->end;
    if (start->type == ggHardStart || end->type == ggHardEnd)
	{
	rangeTreeAddValList(rangeTree, start->position, end->position, edge);
	}
    }

/* Traverse graph yet one more time looking for doubly-soft exons
 * that are overlapping the spliced exons. */
for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
    {
    struct edge *edge = edgeRef->val;
    struct vertex *start = edge->start;
    struct vertex *end = edge->end;
    if (start->type == ggSoftStart && end->type == ggSoftEnd)
        {
	int s = start->position;
	int e = end->position;
	int size = e - s;
	if (size <= maxBleedOver+maxBleedOver)
	     {
	     /* Tiny case, just remove edge and forget it. */
	     verbose(3, "Removing tiny double-soft edge from %d to %d\n", s, e);
	     rbTreeRemove(edgeTree, edge);
	     ++removedCount;
	     }
	else
	     {
	     /* Normal case, look for exon list that encloses us, and
	      * if any single exon in that list encloses us, merge into it. */
	     int splicedOverlap = rangeTreeOverlapSize(rangeTree, s, e);
	     if (splicedOverlap > 0 && splicedOverlap > singleExonMaxOverlap*size)
	         {
		 if (!trustedEdge(edge))
		     {
		     /* Once we find a range that overlaps the doubly-soft edge, find 
		      * (half-hard or better) edge from that range that encloses the 
		      * doubly soft edge. */
		     struct range *r = rangeTreeMaxOverlapping(rangeTree, s, e);
		     struct edge *nextEdge, *edgeList = r->val;
		     struct edge *enclosingEdge = NULL;
		     for (nextEdge = edgeList; edgeList != NULL; edgeList = edgeList->next)
			 {
			 if (encloses(nextEdge, edge))
			     {
			     enclosingEdge = nextEdge;
			     }
			 }
		     if (enclosingEdge != NULL) 
			 {
			 enclosingEdge->evList = slCat(enclosingEdge->evList, edge->evList);
			 edge->evList = NULL;
			 verbose(3, "Removing doubly-soft edge %d-%d, reassigning to %d-%d\n",
				 s, e, enclosingEdge->start->position, 
				 enclosingEdge->end->position);
			 rbTreeRemove(edgeTree, edge);
			 ++removedCount;
			 }
		     }
		 }
	     }
	}
    }

/* Clean up and go home. */
if (removedCount > 0)
    removeUnusedVertices(vertexTree, edgeTree);
for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
    {
    struct edge *nextEdge, *edge = edgeRef->val;
    while (edge != NULL) 
	{
	nextEdge = edge->next;
	edge->next = NULL;
	edge = nextEdge;
	}
    }
slFreeList(&edgeRefList);
rbTreeFree(&rangeTree);
}

void mergeDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree)
/* Merge together overlapping edges with soft ends. */
{
struct mergedEdge
/* Hold together info on a merged edge. */
    {
    struct evidence *evidence;
    };

/* Traverse graph and build up range tree.  Each node in the range tree
 * will represent the bounds of coordinates of overlapping double softs */
struct rbTree *rangeTree = rangeTreeNew(0);
struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree);
for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
    {
    struct edge *edge = edgeRef->val;
    struct vertex *start = edge->start;
    struct vertex *end = edge->end;
    if (start->type == ggSoftStart && end->type == ggSoftEnd)
        rangeTreeAdd(rangeTree, start->position, end->position);
    }

/* Traverse graph again merging edges */
for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
    {
    struct edge *edge = edgeRef->val;
    struct vertex *start= edge->start;
    struct vertex *end = edge->end;
    if (start->type == ggSoftStart && end->type == ggSoftEnd)
        {
	struct range *r = rangeTreeFindEnclosing(rangeTree,
		start->position, end->position);
	assert(r != NULL);
	/* At this point, r represents the bounds of a double-soft
	 * region that encompasses this edge.  Collect the set of
	 * evidence of edges overlapping this range */
        struct mergedEdge *mergeEdge = r->val;
        if (mergeEdge == NULL)
            {
            lmAllocVar(rangeTree->lm, mergeEdge);
            r->val = mergeEdge;
            }
        mergeEdge->evidence = slCat(edge->evList, mergeEdge->evidence);
	verbose(3, "Merging doubly-soft edge (%d,%d) into range (%d,%d)\n", 
		start->position, end->position, r->start, r->end);
        edge->evList = NULL;
        rbTreeRemove(edgeTree, edge);
	}
    }

/* Traverse merged edge list, making a single edge from each range. At this point,
 * each range will have some evidence attached to it, from each of the double softs
 * that fall within the range.  From all of this evidence, make a single consensus edge */
struct range *r;
struct lm *lm = lmInit(0);
for (r = rangeTreeList(rangeTree); r != NULL; r = r->next)
    {
    struct mergedEdge *mergedEdge = r->val;
    struct edge *edge = edgeFromConsensusOfEvidence(vertexTree, mergedEdge->evidence, lm);
    if (edge != NULL)
        rbTreeAdd(edgeTree, edge);
    verbose(3, "Deriving edge (%d,%d) from all the double softs in range (%d,%d)\n", 
	    edge->start->position, edge->end->position, r->start, r->end);
    }


/* Clean up and go home. */
lmCleanup(&lm);
removeUnusedVertices(vertexTree, edgeTree);
slFreeList(&edgeRefList);
rbTreeFree(&rangeTree);
}

#ifdef DEBUG
oid dumpVertices(struct rbTree *vertexTree)
{
struct slRef *vRef, *vRefList = rbTreeItems(vertexTree);
for (vRef = vRefList; vRef != NULL; vRef = vRef->next)
    {
    struct vertex *v = vRef->val;
    printf(" %d(%d)", v->position, v->type);
    }
printf("\n");
slFreeList(&vRefList);
}
#endif /* DEBUG */

struct txGraph *treeTreeToTxg(struct rbTree *vertexTree, struct rbTree *edgeTree, char *name,
	struct linkedBeds *lbList)
/* Convert from vertexTree/edgeTree representation to txGraph */
{
if (vertexTree->root == NULL || edgeTree->root == NULL)  // Nothing to do really
    return NULL;

/* Allocate txGraph, and fill in some of the relatively easy fields. */
struct txGraph *txg;
AllocVar(txg);
struct bed *bed = lbList->bedList;
txg->name = cloneString(name);
txg->tName = cloneString(bed->chrom);
txg->strand[0] = bed->strand[0];
txg->vertexCount = vertexTree->n;
txg->edgeCount = edgeTree->n;

/* Get vertex list and number sequentially. Fill in vertex array */
int i;
struct slRef *vRef, *vRefList = rbTreeItems(vertexTree);
struct txVertex *tv = NULL;
if (vertexTree->n)
    tv = AllocArray(txg->vertices, vertexTree->n);
int tStart = BIGNUM, tEnd = -BIGNUM;
for (vRef = vRefList, i=0; vRef != NULL; vRef = vRef->next, i++)
    {
    struct vertex *v = vRef->val;
    int position = v->position;
    v->count = i;
    tv->position = position;
    tv->type = v->type;
    tStart = min(tStart, position);
    tEnd = max(tEnd, position);
    ++tv;
    }
slFreeList(&vRefList);

/* Fill in overall bounds. */
txg->tStart = tStart;
txg->tEnd = tEnd;

/* Deal with sources. */
int sourceCount = 0;
struct linkedBeds *lb;
for (lb = lbList; lb != NULL; lb = lb->next)
    lb->id = sourceCount++;
struct txSource *source = AllocArray(txg->sources, sourceCount);
for (lb = lbList; lb != NULL; lb = lb->next)
    {
    source->accession = cloneString(lb->bedList->name);
    source->type = cloneString(lb->sourceType);
    ++source;
    }
txg->sourceCount = sourceCount;

/* Convert edges */
struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree);
for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
    {
    struct edge *edge = edgeRef->val;

    /* Allocate edge and fill in start and end. */
    struct txEdge *te;
    AllocVar(te);
    struct vertex *start = edge->start;
    struct vertex *end = edge->end;
    te->startIx = start->count;
    te->endIx = end->count;

    /* Figure out whether it's an intron or exon. */
    enum ggVertexType startType = edge->start->type;
    if (startType == ggHardStart || startType == ggSoftStart)
        te->type = ggExon;
    else
        te->type = ggIntron;

    /* Convert the evidence. */
    struct evidence *ev;
    for (ev = edge->evList; ev != NULL; ev = ev->next)
        {
	struct txEvidence *tev;
	AllocVar(tev);
	tev->sourceId = ev->lb->id;
	tev->start = ev->start;
	tev->end = ev->end;
	slAddHead(&te->evList, tev);
	te->evCount += 1;
	}
    slReverse(&te->evList);

    slAddHead(&txg->edgeList, te);
    }
slReverse(&txg->edgeList);
return txg;
}

struct txGraph *makeGraph(struct linkedBeds *lbList, int maxBleedOver, 
	int maxUncheckedBleed, struct nibTwoCache *seqCache,
	double singleExonMaxOverlap, char *name)
/* Create a graph corresponding to linkedBedsList.
 * The maxBleedOver parameter controls how much of a soft edge that
 * can be cut off when snapping to a hard edge.  The singleExonMaxOverlap
 * controls what ratio of a single exon transcript can overlap spliced 
 * transcripts */
{
char *chromName = lbList->bedList->chrom;

/* Create tree of all unique vertices. */
struct rbTree *vertexTree = makeVertexTree(lbList);
verbose(2, "%d unique vertices\n", vertexTree->n);

/* Create tree of all unique edges */
struct rbTree *edgeTree = makeEdgeTree(lbList, vertexTree);
verbose(2, "%d unique edges\n", edgeTree->n);

snapSoftToCloseHard(vertexTree, edgeTree, maxBleedOver, maxUncheckedBleed, seqCache, chromName);
verbose(2, "%d edges, %d vertices after snapSoftToCloseHard\n", 
	edgeTree->n, vertexTree->n);

removeEmptyEdges(vertexTree, edgeTree);
verbose(2, "%d edges, %d vertices after removeEmptyEdges\n",
	edgeTree->n, vertexTree->n);

snapHalfHards(vertexTree, edgeTree);
verbose(2, "%d edges, %d vertices after snapHalfHards\n", 
	edgeTree->n, vertexTree->n);

halfHardConsensuses(vertexTree, edgeTree);
verbose(2, "%d edges, %d vertices after medianHalfHards\n", 
	edgeTree->n, vertexTree->n);

removeEnclosedDoubleSofts(vertexTree, edgeTree, maxBleedOver, singleExonMaxOverlap);
verbose(2, "%d edges, %d vertices after mergeEnclosedDoubleSofts\n",
	edgeTree->n, vertexTree->n);

mergeDoubleSofts(vertexTree, edgeTree);
verbose(2, "%d edges, %d vertices after mergeDoubleSofts\n",
	edgeTree->n, vertexTree->n);

struct txGraph *txg = treeTreeToTxg(vertexTree, edgeTree, name, lbList);

/* Clean up and go home. */
rbTreeFree(&vertexTree);
rbTreeFree(&edgeTree);
return txg;
}
