/* netClass - Add classification info to net. */

/* Copyright (C) 2013 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
#include "common.h"
#include "linefile.h"
#include "chromInfo.h"
#include "hash.h"
#include "options.h"
#include "rbTree.h"
#include "jksql.h"
#include "hdb.h"
#include "localmem.h"
#include "agpGap.h"
#include "simpleRepeat.h"
#include "liftUp.h"
#include "chainNet.h"


/* Command line switches. */
char *tNewR = NULL;
char *qNewR = NULL;
boolean noAr = FALSE;
char *qRepeatTable = NULL;
char *tRepeatTable = NULL;
char *qSizes = NULL;
struct hash *liftHashT = NULL;
struct hash *liftHashQ = NULL;

/* Localmem obj shared by cached query rbTrees. */
struct lm *qLm = NULL;

/* command line option specifications */
static struct optionSpec optionSpecs[] = {
    {"tNewR", OPTION_STRING},
    {"qNewR", OPTION_STRING},
    {"noAr", OPTION_BOOLEAN},
    {"qRepeats", OPTION_STRING},
    {"tRepeats", OPTION_STRING},
    {"liftT", OPTION_STRING},
    {"liftQ", OPTION_STRING},
    {"qSizes", OPTION_STRING},
    {NULL, 0}
};

static void usage()
/* Explain usage and exit. */
{
errAbort(
  "netClass - Add classification info to net\n"
  "usage:\n"
  "   netClass [options] in.net tDb qDb out.net\n"
  "       tDb - database to fetch target repeat masker table information\n"
  "       qDb - database to fetch query repeat masker table information\n"
  "options:\n"
  "   -tNewR=dir - Dir of chrN.out.spec files, with RepeatMasker .out format\n"
  "                lines describing lineage specific repeats in target\n"
  "   -qNewR=dir - Dir of chrN.out.spec files for query\n"
  "   -noAr - Don't look for ancient repeats\n"
  "   -qRepeats=table - table name for query repeats in place of rmsk\n"
  "   -tRepeats=table - table name for target repeats in place of rmsk\n"
  "                   - for example: -tRepeats=windowmaskerSdust\n"
  "   -liftQ=file.lft - Lift in.net's query coords to chrom-level using\n"
  "                     file.lft (for accessing chrom-level coords in qDb)\n"
  "   -liftT=file.lft - Lift in.net's target coords to chrom-level using\n"
  "                     file.lft (for accessing chrom-level coords in tDb)\n"
  "   -qSizes=chrom.sizes - file with query chrom.sizes instead of reading\n"
  "                   - the chromInfo table from the database\n"
  );
}

struct chrom
/* Basic information on a chromosome. */
    {
    struct chrom *next;	  /* Next in list */
    char *name;		  /* Chromosome name, allocated in hash. */
    int size;		  /* Chromosome size. */
    struct rbTree *nGaps; /* Gaps in sequence (Ns) */
    struct rbTree *repeats; /* Repeats in sequence */
    struct rbTree *newRepeats; /* New (lineage specific) repeats. */
    struct rbTree *oldRepeats; /* Old (pre-split) repeats. */
    struct rbTree *trf;	       /* Simple repeats. */
    };

struct simpleRange
/* A part of a chromosome. */
    {
    int start, end;	/* Half open zero based coordinates. */
    };

#define overlap(r1, r2) ((r1)->start <= (r2)->end && (r2)->start <= (r1)->end)

int simpleRangeCmp(void *va, void *vb)
/* Return -1 if a before b,  0 if a and b overlap,
 * and 1 if a after b. */
{
struct simpleRange *a = va;
struct simpleRange *b = vb;
if (a->end <= b->start)
    return -1;
else if (b->end <= a->start)
    return 1;
else
    return 0;
}

static int interSize;	/* Size of intersection. */
static struct simpleRange interRange; /* Range to intersect with. */

void addInterSize(void *item)
/* Add range to interSize. */
{
struct simpleRange *r = item;
int size;
size = rangeIntersection(r->start, r->end, interRange.start, interRange.end);
interSize += size;
}

int intersectionSize(struct rbTree *tree, int start, int end)
/* Return total size of things intersecting range start-end. */
{
if (tree == NULL)
    return 0;
interRange.start = start;
interRange.end = end;
interSize = 0;
rbTreeTraverseRange(tree, &interRange, &interRange, addInterSize);
return interSize;
}

void setNGap(char *chr, struct hash *chromHash, struct rbTree *tree)
/* Set the appropriate chrom->nGaps to tree -- make sure it's NULL first. */
{
struct chrom *chrom = hashMustFindVal(chromHash, chr);
if (chrom->nGaps != NULL)
    errAbort("getSeqGapsUnsplit: query results must be orderd by chrom, but "
	     "looks like they weren't for %s", chr);
chrom->nGaps = tree;
}

void getSeqGapsUnsplit(struct sqlConnection *conn, struct hash *chromHash)
/* Return a tree of ranges for sequence gaps in all chromosomes, 
 * assuming an unsplit gap table -- when the table is unsplit, it's 
 * probably for a scaffold assembly where we *really* don't want 
 * to do one query per scaffold! */
{
struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
struct rbTree *tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
int rowOffset = hOffsetPastBin(sqlGetDatabase(conn), NULL, "gap");
struct sqlResult *sr;
char **row;
char *prevChrom = NULL;

char query[1024];
sqlSafef(query, sizeof query, "select * from gap order by chrom");
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct agpGap gap;
    struct simpleRange *range;
    agpGapStaticLoad(row+rowOffset, &gap);
    if (prevChrom == NULL)
	prevChrom = cloneString(gap.chrom);
    else if (! sameString(prevChrom, gap.chrom))
	{
	setNGap(prevChrom, chromHash, tree);
	freeMem(prevChrom);
	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
	tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
	prevChrom = cloneString(gap.chrom);
	}
    lmAllocVar(tree->lm, range);
    range->start = gap.chromStart;
    range->end = gap.chromEnd;
    rbTreeAdd(tree, range);
    }
if (prevChrom != NULL)
    {
    setNGap(prevChrom, chromHash, tree);
    freeMem(prevChrom);
    }
sqlFreeResult(&sr);
}

struct rbTree *getSeqGaps(struct sqlConnection *conn, char *chrom)
/* Return a tree of ranges for sequence gaps in chromosome */
{
struct rbTree *tree = rbTreeNew(simpleRangeCmp);
int rowOffset;
struct sqlResult *sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset);
char **row;

while ((row = sqlNextRow(sr)) != NULL)
    {
    struct agpGap gap;
    struct simpleRange *range;
    agpGapStaticLoad(row+rowOffset, &gap);
    lmAllocVar(tree->lm, range);
    range->start = gap.chromStart;
    range->end = gap.chromEnd;
    rbTreeAdd(tree, range);
    }
sqlFreeResult(&sr);
return tree;
}

void setTrf(char *chr, struct hash *chromHash, struct rbTree *tree)
/* Set the appropriate chrom->trf to tree -- make sure it's NULL first. */
{
struct chrom *chrom = hashMustFindVal(chromHash, chr);
if (chrom->trf != NULL)
    errAbort("getTrfUnsplit: query results must be orderd by chrom, but "
	     "looks like they weren't for %s", chr);
chrom->trf = tree;
}

void getTrfUnsplit(struct sqlConnection *conn, struct hash *chromHash)
/* Return a tree of ranges for simple repeats in all chromosomes, 
 * from a single query on the whole (unsplit) simpleRepeat table. */
{
struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
struct rbTree *tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
struct simpleRange *range, *prevRange = NULL;
struct sqlResult *sr;
char **row;
char *prevChrom = NULL;

char query[1024];
sqlSafef(query, sizeof query, 
"select chrom,chromStart,chromEnd from simpleRepeat"
		  " order by chrom,chromStart");
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    if (prevChrom == NULL)
	prevChrom = cloneString(row[0]);
    else if (! sameString(prevChrom, row[0]))
	{
	rbTreeAdd(tree, prevRange);
	setTrf(prevChrom, chromHash, tree);
	prevRange = NULL;
	freeMem(prevChrom);
	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
	tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
	prevChrom = cloneString(row[0]);
	}
    lmAllocVar(tree->lm, range);
    range->start = sqlUnsigned(row[1]);
    range->end = sqlUnsigned(row[2]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(tree, prevRange);
	prevRange = range;
	}
    }
if (prevChrom != NULL)
    {
    rbTreeAdd(tree, prevRange);
    setTrf(prevChrom, chromHash, tree);
    freeMem(prevChrom);
    }
sqlFreeResult(&sr);
}

struct rbTree *getTrf(struct sqlConnection *conn, char *chrom)
/* Return a tree of ranges for simple repeats in chromosome. */
{
struct rbTree *tree = rbTreeNew(simpleRangeCmp);
struct simpleRange *range, *prevRange = NULL;
char query[256];
struct sqlResult *sr;
char **row;

sqlSafef(query, sizeof query, "select chromStart,chromEnd from simpleRepeat "
               "where chrom = '%s'",
	       chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    lmAllocVar(tree->lm, range);
    range->start = sqlUnsigned(row[0]);
    range->end = sqlUnsigned(row[1]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(tree, prevRange);
	prevRange = range;
	}
    }
if (prevRange != NULL)
    rbTreeAdd(tree, prevRange);
sqlFreeResult(&sr);
return tree;
}

static void setRepeats(char *chr, struct hash *chromHash,
	struct rbTree *allTree, struct rbTree *newTree)
/* Set the appropriate chrom->repeats to tree -- make sure it's NULL first. */
{
struct chrom *chrom = hashMustFindVal(chromHash, chr);
if (chrom->repeats != NULL)
    errAbort("getRepeatsUnsplit: query results must be orderd by genoName "
	     "(chrom), but looks like they weren't for %s", chr);
chrom->repeats = allTree;
chrom->newRepeats = newTree;
}

static void getRepeatsUnsplit(struct sqlConnection *conn,
	struct hash *chromHash, struct hash *arHash)
/* Return a tree of ranges for sequence gaps all chromosomes, 
 * assuming an unsplit table -- when the table is unsplit, it's 
 * probably for a scaffold assembly where we *really* don't want 
 * to do one query per scaffold! */
{
struct sqlResult *sr;
char **row;
struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
struct rbTree *allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
struct rbTreeNode **newstack = lmAlloc(qLm, 256 * sizeof(newstack[0]));
struct rbTree *newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, newstack);
char *prevChrom = NULL;
struct simpleRange *prevRange = NULL, *prevNewRange = NULL;

char query[1024];
sqlSafef(query, sizeof query, 
    "select genoName,genoStart,genoEnd,repName,repClass,repFamily from rmsk "
    "order by genoName,genoStart");
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct simpleRange *range;
    char arKey[512];
    if (prevChrom == NULL)
	prevChrom = cloneString(row[0]);
    else if (! sameString(prevChrom, row[0]))
	{
	rbTreeAdd(allTree, prevRange);
	if (prevNewRange != NULL)
	    rbTreeAdd(newTree, prevNewRange);
	setRepeats(prevChrom, chromHash, allTree, newTree);
	freeMem(prevChrom);
	prevRange = prevNewRange = NULL;
	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
	allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
	if (arHash != NULL)
	    {
	    stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
	    newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
	    }
	prevChrom = cloneString(row[0]);
	}
    lmAllocVar(allTree->lm, range);
    range->start = sqlUnsigned(row[1]);
    range->end = sqlUnsigned(row[2]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(allTree, prevRange);
	prevRange = range;
	}
    sprintf(arKey, "%s.%s.%s", row[3], row[4], row[5]);
    if (arHash != NULL && hashLookup(arHash, arKey))
        {
	lmAllocVar(newTree->lm, range);
	range->start = sqlUnsigned(row[1]);
	range->end = sqlUnsigned(row[2]);
	if (prevNewRange == NULL)
	    prevNewRange = range;
	else if (overlap(range, prevNewRange))
	    {
	    /* merge r into prevR & discard; prevR gets passed forward. */
	    if (range->end > prevNewRange->end)
		prevNewRange->end = range->end;
	    if (range->start < prevNewRange->start)
		prevNewRange->start = range->start;
	    }
	else
	    {
	    rbTreeAdd(newTree, prevNewRange);
	    prevNewRange = range;
	    }
	}
    }
if (prevChrom != NULL)
    {
    rbTreeAdd(allTree, prevRange);
    if (prevNewRange != NULL)
	rbTreeAdd(newTree, prevNewRange);
    setRepeats(prevChrom, chromHash, allTree, newTree);
    freeMem(prevChrom);
    }
sqlFreeResult(&sr);
}

static void getRepeatsUnsplitTable(struct sqlConnection *conn,
	struct hash *chromHash, char *table)
/* Return a tree of ranges for sequence gaps all chromosomes, 
 *	from specified table
 */
{
struct sqlResult *sr;
char **row;
struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
struct rbTree *allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
struct rbTreeNode **newstack = lmAlloc(qLm, 256 * sizeof(newstack[0]));
struct rbTree *newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, newstack);
char *prevChrom = NULL;
struct simpleRange *prevRange = NULL, *prevNewRange = NULL;
char query[256];


sqlSafef(query, ArraySize(query), "select chrom,chromStart,chromEnd from %s "
    "order by chrom,chromStart", table);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct simpleRange *range;
    if (prevChrom == NULL)
	prevChrom = cloneString(row[0]);
    else if (! sameString(prevChrom, row[0]))
	{
	rbTreeAdd(allTree, prevRange);
	if (prevNewRange != NULL)
	    rbTreeAdd(newTree, prevNewRange);
	setRepeats(prevChrom, chromHash, allTree, newTree);
	freeMem(prevChrom);
	prevRange = prevNewRange = NULL;
	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
	allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
	prevChrom = cloneString(row[0]);
	}
    lmAllocVar(allTree->lm, range);
    range->start = sqlUnsigned(row[1]);
    range->end = sqlUnsigned(row[2]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(allTree, prevRange);
	prevRange = range;
	}
    }
if (prevChrom != NULL)
    {
    rbTreeAdd(allTree, prevRange);
    if (prevNewRange != NULL)
	rbTreeAdd(newTree, prevNewRange);
    setRepeats(prevChrom, chromHash, allTree, newTree);
    freeMem(prevChrom);
    }
sqlFreeResult(&sr);
}	/*	void getRepeatsUnsplitTable()	*/

static void getRepeats(struct sqlConnection *conn, struct hash *arHash,
    char *chrom, struct rbTree **retAllRepeats,
	struct rbTree **retNewRepeats)
/* Return a tree of ranges for sequence gaps in chromosome */
{
char *db = sqlGetDatabase(conn);
struct sqlResult *sr;
char **row;
struct rbTree *allTree = rbTreeNew(simpleRangeCmp);
struct rbTree *newTree = rbTreeNew(simpleRangeCmp);
char tableName[64];
char query[256];
boolean splitRmsk = TRUE;
struct simpleRange *prevRange = NULL, *prevNewRange = NULL;

safef(tableName, sizeof(tableName), "%s_rmsk", chrom);
if (! sqlTableExists(conn, tableName))
    {
    safef(tableName, sizeof(tableName), "rmsk");
    if (! sqlTableExists(conn, tableName))
	errAbort("Can't find rmsk table for %s (%s.%s_rmsk or %s.rmsk)\n",
		 chrom, db, chrom, db);
    splitRmsk = FALSE;
    }
if (splitRmsk)
    sqlSafef(query, sizeof query,
	    "select genoStart,genoEnd,repName,repClass,repFamily from %s",
	    tableName);
else
    sqlSafef(query, sizeof query,
	    "select genoStart,genoEnd,repName,repClass,repFamily from %s "
	    "where genoName = \"%s\"",
	    tableName, chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct simpleRange *range;
    char arKey[512];
    lmAllocVar(allTree->lm, range);
    range->start = sqlUnsigned(row[0]);
    range->end = sqlUnsigned(row[1]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(allTree, prevRange);
	prevRange = range;
	}
    sprintf(arKey, "%s.%s.%s", row[2], row[3], row[4]);
    if (arHash != NULL && hashLookup(arHash, arKey))
        {
	lmAllocVar(newTree->lm, range);
	range->start = sqlUnsigned(row[0]);
	range->end = sqlUnsigned(row[1]);
	if (prevNewRange == NULL)
	    prevNewRange = range;
	else if (overlap(range, prevNewRange))
	    {
	    /* merge r into prevR & discard; prevR gets passed forward. */
	    if (range->end > prevNewRange->end)
		prevNewRange->end = range->end;
	    if (range->start < prevNewRange->start)
		prevNewRange->start = range->start;
	    }
	else
	    {
	    rbTreeAdd(allTree, prevNewRange);
	    prevNewRange = range;
	    }
	}
    }
if (prevRange != NULL)
    rbTreeAdd(allTree, prevRange);
if (prevNewRange != NULL)
    rbTreeAdd(newTree, prevNewRange);
sqlFreeResult(&sr);
*retAllRepeats = allTree;
*retNewRepeats = newTree;
}

static void getRepeatsTable(struct sqlConnection *conn, char *table,
    char *chrom, struct rbTree **retAllRepeats,
	struct rbTree **retNewRepeats)
/* Return a tree of ranges for sequence gaps in chromosome from
 *	specified table */
{
struct sqlResult *sr;
char **row;
struct rbTree *allTree = rbTreeNew(simpleRangeCmp);
struct rbTree *newTree = rbTreeNew(simpleRangeCmp);
char query[256];
struct simpleRange *prevRange = NULL, *prevNewRange = NULL;

sqlSafef(query, ArraySize(query), "select chromStart,chromEnd from %s "
	    "where chrom = \"%s\"", table, chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct simpleRange *range;
    lmAllocVar(allTree->lm, range);
    range->start = sqlUnsigned(row[0]);
    range->end = sqlUnsigned(row[1]);
    if (prevRange == NULL)
	prevRange = range;
    else if (overlap(range, prevRange))
	{
	/* merge r into prevR & discard; prevR gets passed forward. */
	if (range->end > prevRange->end)
	    prevRange->end = range->end;
	if (range->start < prevRange->start)
	    prevRange->start = range->start;
	}
    else
	{
	rbTreeAdd(allTree, prevRange);
	prevRange = range;
	}
    }
if (prevRange != NULL)
    rbTreeAdd(allTree, prevRange);
if (prevNewRange != NULL)
    rbTreeAdd(newTree, prevNewRange);
sqlFreeResult(&sr);
*retAllRepeats = allTree;
*retNewRepeats = newTree;
}	/*	static void getRepeatsTable()	*/

static struct rbTree *getNewRepeats(char *dirName, char *chrom)
/* Read in repeatMasker .out line format file into a tree of ranges. */
/* Handles lineage-specific files that preserve header */
{
struct rbTree *tree = rbTreeNew(simpleRangeCmp);
struct simpleRange *range;
char fileName[512];
struct lineFile *lf;
char *row[7];
boolean headerDone = FALSE;

sprintf(fileName, "%s/%s.out.spec", dirName, chrom);
lf = lineFileOpen(fileName, TRUE);
while (lineFileRow(lf, row))
    {
    /* skip header lines (don't contain numeric first field) */
    if (!headerDone && atoi(row[0]) == 0)
        continue;
    if (!sameString(chrom, row[4]))
        errAbort("Expecting %s word 5, line %d of %s\n", 
		chrom, lf->lineIx, lf->fileName);
    headerDone = TRUE;
    lmAllocVar(tree->lm, range);
    range->start = lineFileNeedNum(lf, row, 5) - 1;
    range->end = lineFileNeedNum(lf, row, 6);
    rbTreeAdd(tree, range);
    }
lineFileClose(&lf);
return tree;
}

struct hash *getAncientRepeats(struct sqlConnection *tConn,
			       struct sqlConnection *qConn)
/* Get hash of ancient repeats.  This keyed by name.family.class. */
{
struct sqlConnection *conn = NULL;
struct sqlResult *sr;
char **row;
char key[512];
struct hash *hash = newHash(10);

if (sqlTableExists(tConn, "ancientRepeat"))
    conn = tConn;
else if (sqlTableExists(qConn, "ancientRepeat"))
    conn = qConn;
else
    errAbort("Can't find ancientRepeat table in %s or %s",
	     sqlGetDatabase(tConn), sqlGetDatabase(qConn));
char query[1024];
sqlSafef(query, sizeof query, "select name,family,class from ancientRepeat");
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    sprintf(key, "%s.%s.%s", row[0], row[1], row[2]);
    hashAdd(hash, key, NULL);
    }
sqlFreeResult(&sr);
return hash;
}

static void getChroms(struct sqlConnection *conn, struct hash **retHash,
	       struct chrom **retList, char *sizes)
/* Get hash of chromosomes from database or from sizes file if given. */
{
struct chrom *chromList = NULL, *chrom;
struct hash *hash = hashNew(8);

if (sizes)
    {
    struct chromInfo *list = chromInfoListFromFile(sizes);
    struct chromInfo *ci = NULL;
    for (ci = list; ci; ci=ci->next)
	{
	AllocVar(chrom);
	hashAddSaveName(hash, ci->chrom, chrom, &chrom->name);
	chrom->size = ci->size;
	slAddHead(&chromList, chrom);
	}
    }
else
    {
    struct sqlResult *sr;
    char **row;
    char query[1024];
    sqlSafef(query, sizeof query, "select chrom,size from chromInfo");
    sr = sqlGetResult(conn, query);
    while ((row = sqlNextRow(sr)) != NULL)
	{
	AllocVar(chrom);
	hashAddSaveName(hash, row[0], chrom, &chrom->name);
	chrom->size = atoi(row[1]);
	slAddHead(&chromList, chrom);
	}
    sqlFreeResult(&sr);
    }

slReverse(&chromList);
*retHash = hash;
*retList = chromList;
}

int liftStart(char *name, int start, struct hash *liftHash)
/* Lift start if necessary. */
{
int s = start;
if (liftHash != NULL)
    {
    struct liftSpec *lft = hashMustFindVal(liftHash, name);
    s += lft->offset;
    }
return s;
}

void tAddN(struct chainNet *net, struct cnFill *fillList, struct rbTree *tree)
/* Add tN's to all gaps underneath fillList. */
{
struct cnFill *fill;
for (fill = fillList; fill != NULL; fill = fill->next)
    {
    int s = liftStart(net->name, fill->tStart, liftHashT);
    fill->tN = intersectionSize(tree, s, s + fill->tSize);
    if (fill->children)
	tAddN(net, fill->children, tree);
    }
}

struct chrom *getQChrom(char *qName, struct hash *qChromHash)
/* Lift qName to chrom if necessary and dig up from qChromHash. */
{
struct chrom *qChrom = NULL;
if (liftHashQ != NULL)
    {
    struct liftSpec *lft = hashMustFindVal(liftHashQ, qName);
    qChrom = hashMustFindVal(qChromHash, lft->newName);
    }
else
    qChrom = hashMustFindVal(qChromHash, qName);
return(qChrom);
}

void qAddN(struct chainNet *net, struct cnFill *fillList, struct hash *qChromHash)
/* Add qN's to all gaps underneath fillList. */
{
struct cnFill *fill;
for (fill = fillList; fill != NULL; fill = fill->next)
    {
    struct chrom *qChrom = getQChrom(fill->qName, qChromHash);
    struct rbTree *tree = qChrom->nGaps;
    int s = liftStart(fill->qName, fill->qStart, liftHashQ);
    fill->qN = intersectionSize(tree, s, s + fill->qSize);
    if (fill->children)
	qAddN(net, fill->children, qChromHash);
    }
}

void tAddR(struct chainNet *net, struct cnFill *fillList, struct rbTree *tree)
/* Add t repeats's to all things underneath fillList. */
{
struct cnFill *fill;
for (fill = fillList; fill != NULL; fill = fill->next)
    {
    int s = liftStart(net->name, fill->tStart, liftHashT);
    fill->tR = intersectionSize(tree, s, s + fill->tSize);
    if (fill->children)
	tAddR(net, fill->children, tree);
    }
}

void qAddR(struct chainNet *net, struct cnFill *fillList, struct hash *qChromHash)
/* Add q repeats to all things underneath fillList. */
{
struct cnFill *fill;
for (fill = fillList; fill != NULL; fill = fill->next)
    {
    struct chrom *qChrom = getQChrom(fill->qName, qChromHash);
    int s = liftStart(fill->qName, fill->qStart, liftHashQ);
    fill->qR = intersectionSize(qChrom->repeats, s, s + fill->qSize);
    if (fill->children)
	qAddR(net, fill->children, qChromHash);
    }
}

void tAddNewR(struct chainNet *net, struct cnFill *fillList, struct rbTree *tree)
/* Add t new repeats's to all things underneath fillList. */
{
struct cnFill *fill;
for (fill = fillList; fill != NULL; fill = fill->next)
    {
    int s = liftStart(net->name, fill->tStart, liftHashT);
    fill->tNewR = intersectionSize(tree, s, s + fill->tSize);
    if (fill->children)
	tAddNewR(net, fill->children, tree);
    }
}

void qAddNewR(struct chainNet *net, struct cnFill *fillList, struct hash *qChromHash)
/* Add q new repeats to all things underneath fillList. */
{
struct cnFill *fill;
for (fill = fillList; fill != NULL; fill = fill->next)
    {
    struct chrom *qChrom = getQChrom(fill->qName, qChromHash);
    int s = liftStart(fill->qName, fill->qStart, liftHashQ);
    fill->qNewR = intersectionSize(qChrom->newRepeats, s, s + fill->qSize);
    if (fill->children)
	qAddNewR(net, fill->children, qChromHash);
    }
}

void tAddOldR(struct chainNet *net, struct cnFill *fillList, struct rbTree *tree)
/* Add t new repeats's to all things underneath fillList. */
{
struct cnFill *fill;
for (fill = fillList; fill != NULL; fill = fill->next)
    {
    int s = liftStart(net->name, fill->tStart, liftHashT);
    fill->tOldR = intersectionSize(tree, s, s + fill->tSize);
    if (fill->children)
	tAddOldR(net, fill->children, tree);
    }
}

void qAddOldR(struct chainNet *net, struct cnFill *fillList, struct hash *qChromHash)
/* Add q new repeats to all things underneath fillList. */
{
struct cnFill *fill;
for (fill = fillList; fill != NULL; fill = fill->next)
    {
    struct chrom *qChrom = getQChrom(fill->qName, qChromHash);
    int s = liftStart(fill->qName, fill->qStart, liftHashQ);
    fill->qOldR = intersectionSize(qChrom->oldRepeats, s, s + fill->qSize);
    if (fill->children)
	qAddOldR(net, fill->children, qChromHash);
    }
}

void tAddTrf(struct chainNet *net, struct cnFill *fillList, struct rbTree *tree)
/* Add t simple repeats's to all things underneath fillList. */
{
struct cnFill *fill;
for (fill = fillList; fill != NULL; fill = fill->next)
    {
    int s = liftStart(net->name, fill->tStart, liftHashT);
    fill->tTrf = intersectionSize(tree, s, s + fill->tSize);
    if (fill->children)
	tAddTrf(net, fill->children, tree);
    }
}

void qAddTrf(struct chainNet *net, struct cnFill *fillList, struct hash *qChromHash)
/* Add q new repeats to all things underneath fillList. */
{
struct cnFill *fill;
for (fill = fillList; fill != NULL; fill = fill->next)
    {
    struct chrom *qChrom = getQChrom(fill->qName, qChromHash);
    int s = liftStart(fill->qName, fill->qStart, liftHashQ);
    fill->qTrf = intersectionSize(qChrom->trf, s, s + fill->qSize);
    if (fill->children)
	qAddTrf(net, fill->children, qChromHash);
    }
}



void netClass(char *inName, char *tDb, char *qDb, char *outName)
/* netClass - Add classification info to net. */
{
struct chainNet *net;
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = mustOpen(outName, "w");
struct chrom *qChromList, *chrom;
struct hash *qChromHash;
struct hash *arHash = NULL;
struct sqlConnection *tConn = sqlConnect(tDb);
struct sqlConnection *qConn = sqlConnect(qDb);

qLm = lmInit(0);

if (!noAr)
    arHash = getAncientRepeats(tConn, qConn);

getChroms(qConn, &qChromHash, &qChromList, qSizes);

verbose(1, "Reading gaps in %s\n", qDb);
if (sqlTableExists(qConn, "gap"))
    {
    getSeqGapsUnsplit(qConn, qChromHash);
    }
else
    {
    for (chrom = qChromList; chrom != NULL; chrom = chrom->next)
	chrom->nGaps = getSeqGaps(qConn, chrom->name);
    }

if (qNewR)
    {
    verbose(1, "Reading new repeats from %s\n", qNewR);
    for (chrom = qChromList; chrom != NULL; chrom = chrom->next)
        chrom->newRepeats = getNewRepeats(qNewR, chrom->name);
    }

verbose(1, "Reading simpleRepeats in %s\n", qDb);
getTrfUnsplit(qConn, qChromHash);

if (qRepeatTable)
    {
    verbose(1, "Reading repeats in %s from table %s\n", qDb, qRepeatTable);
    getRepeatsUnsplitTable(qConn, qChromHash, qRepeatTable);
    }
else
    {
    verbose(1, "Reading repeats in %s\n", qDb);
    if (sqlTableExists(qConn, "rmsk"))
	getRepeatsUnsplit(qConn, qChromHash, arHash);
    else
	{
	for (chrom = qChromList; chrom != NULL; chrom = chrom->next)
	    getRepeats(qConn, arHash, chrom->name, &chrom->repeats,
		       &chrom->oldRepeats);
	}
    }

while ((net = chainNetRead(lf)) != NULL)
    {
    struct rbTree *tN, *tRepeats, *tOldRepeats, *tTrf;
    char *tName = net->name;
    if (liftHashT != NULL)
	{
	struct liftSpec *lft = hashMustFindVal(liftHashT, net->name);
	tName = lft->newName;
	}

    verbose(1, "Processing %s.%s\n", tDb, net->name);
    tN = getSeqGaps(tConn, tName);
    tAddN(net, net->fillList, tN);
    rbTreeFree(&tN);
    qAddN(net, net->fillList, qChromHash);

    if (tRepeatTable)
	getRepeatsTable(tConn, tRepeatTable, tName, &tRepeats, &tOldRepeats);
    else
	getRepeats(tConn, arHash, tName, &tRepeats, &tOldRepeats);
    tAddR(net, net->fillList, tRepeats);
    if (!noAr)
	tAddOldR(net, net->fillList, tOldRepeats);
    rbTreeFree(&tRepeats);
    rbTreeFree(&tOldRepeats);
    qAddR(net, net->fillList, qChromHash);
    if (!noAr)
	qAddOldR(net, net->fillList, qChromHash);

    tTrf = getTrf(tConn, tName);
    tAddTrf(net, net->fillList, tTrf);
    rbTreeFree(&tTrf);
    qAddTrf(net, net->fillList, qChromHash);

    if (tNewR)
        {
	struct rbTree *tree = getNewRepeats(tNewR, tName);
	tAddNewR(net, net->fillList, tree);
	rbTreeFree(&tree);
	}
    if (qNewR)
        qAddNewR(net, net->fillList, qChromHash);
    chainNetWrite(net, f);
    chainNetFree(&net);
    }
sqlDisconnect(&tConn);
sqlDisconnect(&qConn);
}


int main(int argc, char *argv[])
/* Process command line. */
{
char *liftFileQ = NULL, *liftFileT = NULL;
optionInit(&argc, argv, optionSpecs);
if (argc != 5)
    usage();
tNewR = optionVal("tNewR", tNewR);
qNewR = optionVal("qNewR", qNewR);
noAr = optionExists("noAr");
qRepeatTable = optionVal("qRepeats", qRepeatTable);
tRepeatTable = optionVal("tRepeats", tRepeatTable);
liftFileQ = optionVal("liftQ", liftFileQ);
liftFileT = optionVal("liftT", liftFileT);
qSizes = optionVal("qSizes", qSizes);
if (liftFileQ != NULL)
    {
    struct liftSpec *lifts = readLifts(liftFileQ);
    liftHashQ = hashLift(lifts, TRUE);
    }
if (liftFileT != NULL)
    {
    struct liftSpec *lifts = readLifts(liftFileT);
    liftHashT = hashLift(lifts, TRUE);
    }
netClass(argv[1], argv[2], argv[3], argv[4]);
return 0;
}
