/* genomeRangeTree - This module is a way of keeping track of
 * non-overlapping ranges (half-open intervals) across a whole
 * genome (multiple chromosomes or scaffolds). 
 * It is a hash table container mapping chrom to rangeTree.
 * Most of the work is performed by rangeTree, this container
 * enables local memory and stack to be shared by many rangeTrees
 * so it should be able to handle genomes with a very large 
 * number of scaffolds. See rangeTree for more information. */

#ifndef GENOMERANGETREE_H
#define GENOMERANGETREE_H

#ifndef HASH_H
#include "hash.h"
#endif

#ifndef DYSTRING_H
#include "dystring.h"
#endif

#ifndef RANGETREE_H
#include "rangeTree.h"
#endif

struct genomeRangeTree
/* A structure that allows efficient random access of ranges of bases covered
 * by a particular feature genome-wide.  Implemented as a range tree for each
 * scaffold or chromosome in a genome. */
    {
    struct genomeRangeTree *next;  /* Next genomeRangeTree in list if any. */
    struct hash *hash;             /* Hash of rangeTrees keyed by chromosome/scaffold name. */
    struct rbTreeNode *stack[128]; /* Stack for binary search. */
    struct lm *lm;                 /* Local memory pool for tree nodes and ranges. */
    };

struct genomeRangeTree *genomeRangeTreeNew();
/* Create a new, empty, genomeRangeTree. Uses the default hash size.
 * Free with genomeRangeTreeFree. */

struct genomeRangeTree *genomeRangeTreeNewSize(int hashPowerOfTwoSize);
/* Create a new, empty, genomeRangeTree. 
 * Free with genomeRangeTreeFree. */

void genomeRangeTreeFree(struct genomeRangeTree **pTree);
/* Free up genomeRangeTree. */

struct rbTree *genomeRangeTreeFindRangeTree(struct genomeRangeTree *tree, char *chrom);
/* Find the rangeTree for this chromosome, if any. Returns NULL if chrom not found.
 * Free with genomeRangeTreeFree. */

struct rbTree *genomeRangeTreeFindOrAddRangeTree(struct genomeRangeTree *tree, char *chrom);
/* Find the rangeTree for this chromosome, or add new chrom and empty rangeTree if not found.
 * Free with genomeRangeTreeFree. */

struct range *genomeRangeTreeAdd(struct genomeRangeTree *tree, char *chrom, int start, int end);
/* Add range to tree, merging with existing ranges if need be. 
 * Adds new rangeTree if chrom not found. */

struct range *genomeRangeTreeAddVal(struct genomeRangeTree *tree, char *chrom, int start, int end, void *val, void *(*mergeVals)(void *existing, void *newVal));
/* Add range to tree, merging with existing ranges if need be. 
 * Adds new rangeTree if chrom not found. 
 * If this is a new range, set the value to this val.
 * If there are existing items for this range, and if mergeVals function is not null, 
 * apply mergeVals to the existing values and this new val, storing the result as the val
 * for this range (see rangeTreeAddValCount() and rangeTreeAddValList() below for examples). */

struct range *genomeRangeTreeAddValCount(struct genomeRangeTree *tree, char *chrom, int start, int end);
/* Add range to tree, merging with existing ranges if need be. 
 * Adds new rangeTree if chrom not found. 
 * Set range val to count of elements in the range. Counts are pointers to 
 * ints allocated in tree localmem */

struct range *genomeRangeTreeAddValList(struct genomeRangeTree *tree, char *chrom, int start, int end, void *val);
/* Add range to tree, merging with existing ranges if need be. 
 * Adds new rangeTree if chrom not found. 
 * Add val to the list of values (if any) in each range.
 * val must be valid argument to slCat (ie, be a struct with a 'next' pointer as its first member) */

boolean genomeRangeTreeOverlaps(struct genomeRangeTree *tree, char *chrom, int start, int end);
/* Return TRUE if start-end overlaps anything in tree. */

int genomeRangeTreeOverlapSize(struct genomeRangeTree *tree, char *chrom, int start, int end);
/* Return the total size of intersection between interval
 * from start to end, and items in range tree. Sadly not
 * thread-safe. */

struct range *genomeRangeTreeFindEnclosing(struct genomeRangeTree *tree, char *chrom, int start, int end);
/* Find item in range tree that encloses range between start and end 
 * if there is any such item. */

struct range *genomeRangeTreeAllOverlapping(struct genomeRangeTree *tree, char *chrom, int start, int end);
/* Return list of all items in range tree that overlap interval start-end.
 * Do not free this list, it is owned by tree.  However it is only good until
 * next call to rangeTreeFindInRange or rangeTreeList. Not thread safe. */

struct range *genomeRangeTreeMaxOverlapping(struct genomeRangeTree *tree, char *chrom, int start, int end);
/* Return item that overlaps most with start-end. Not thread safe.  Trashes list used
 * by rangeTreeAllOverlapping. */

struct range *genomeRangeTreeList(struct genomeRangeTree *tree, char *chrom);
/* Return list of all ranges in single rangeTree in order.  Not thread safe. 
 * No need to free this when done, memory is local to tree. */

struct dyString *genomeRangeTreeToString(struct genomeRangeTree *tree);
/* Return a string representation of the genomeRangeTree.
 * Useful for testing.
 * Not thread-safe; uses globals */

long long genomeRangeTreeSumRanges(struct genomeRangeTree *grt);
/* Sum up all ranges in tree. */

#endif /* GENOMERANGETREE_H */

