/* checkHgFindSpec - test & describe search specs in hgFindSpec table. */

/* Copyright (C) 2013 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
#include "common.h"
#include "options.h"
#include "jksql.h"
#include "hash.h"
#include "dystring.h"
#include "portable.h"
#include "hdb.h"
#include "hui.h"
#include "cheapcgi.h"
#include "cart.h"
#include "hgFind.h"
#include "hgFindSpec.h"
#include "regexHelper.h"
#include "genbank.h"


char *database = NULL;
/* Need to get a cart in order to use hgFind. */
struct cart *cart = NULL;

/* Command line option specifications */
static struct optionSpec optionSpecs[] = {
    {"showSearches",    OPTION_BOOLEAN},
    {"checkTermRegex",  OPTION_BOOLEAN},
    {"exampleFor",      OPTION_STRING},
    {"checkIndexes",    OPTION_BOOLEAN},
    {"makeExamples",    OPTION_BOOLEAN},
    {NULL, 0}
};


void usage()
{
errAbort(
"checkHgFindSpec - test and describe search specs in hgFindSpec tables.\n"
"usage:\n"
"  checkHgFindSpec database [options | termToSearch]\n"
"If given a termToSearch, displays the list of tables that will be searched\n"
"and how long it took to figure that out; then performs the search and the\n"
"time it took.\n"
"options:\n"
"  -showSearches       Show the order in which tables will be searched in\n"
"                      general.  [This will be done anyway if no\n"
"                      termToSearch or options are specified.]\n"
"  -checkTermRegex     For each search spec that includes a regular\n"
"                      expression for terms, make sure that all values of\n"
"                      the table field to be searched match the regex.  (If\n"
"                      not, some of them could be excluded from searches.)\n"
"  -checkIndexes       Make sure that an index is defined on each field to\n"
"                      be searched.\n"
/**#*** IMPLEMENT ME!
"  -exampleFor=search  Randomly choose a term for the specified search (from\n"
"                      the target table for the search).  Search for it.\n"
"  -makeExamples       Print out an HTML table of example positions\n"
"                      (suitable for a gateway description.html).\n"
*/
	 );
}


boolean reportSearch(char *termToSearch)
/* Show the list of tables that will be searched, and how long it took to 
 * figure that out.  Then do the search; show results and time required. */
//#*** this doesn't handle ; in termToSearch (until the actual search)
{
struct hgFindSpec *shortList = NULL, *longList = NULL;
struct hgFindSpec *hfs = NULL;
int startMs = 0, endMs = 0;
boolean gotError = FALSE;
char *chrom = NULL;
int chromStart = 0, chromEnd = 0;

hgFindSpecGetAllSpecs(database, &shortList, &longList);
puts("\n");
startMs = clock1000();
for (hfs = shortList;  hfs != NULL;  hfs = hfs->next)
    {
    boolean matches = TRUE;
    boolean tablesExist = hTableOrSplitExists(database, hfs->searchTable);
    if (isNotEmpty(termToSearch) && isNotEmpty(hfs->termRegex))
	matches = regexMatchNoCase(termToSearch, hfs->termRegex);
    if (isNotEmpty(hfs->xrefTable))
	tablesExist |= hTableExists(database, hfs->xrefTable);
    if (matches && tablesExist)
	{
	verbose(1, "SHORT-CIRCUIT %s %f\n", hfs->searchName, hfs->searchPriority);
	}
    else if (matches)
	{
	verbose(2, "no table %s: %s%s%s\n", hfs->searchName, hfs->searchTable,
		isNotEmpty(hfs->xrefTable) ? " and/or " : "",
		isNotEmpty(hfs->xrefTable) ? hfs->xrefTable : "");
	}
    else
	{
	verbose(2, "no match %s: %s\n", hfs->searchName, hfs->termRegex);
	}
    }
endMs = clock1000();
printf("\nTook %dms to determine short-circuit searches.\n\n",
       endMs - startMs);

startMs = clock1000();
for (hfs = longList;  hfs != NULL;  hfs = hfs->next)
    {
    boolean matches = TRUE;
    boolean tablesExist = hTableOrSplitExists(database, hfs->searchTable);
    if (isNotEmpty(termToSearch) && isNotEmpty(hfs->termRegex))
	matches = regexMatchNoCase(termToSearch, hfs->termRegex);
    if (isNotEmpty(hfs->xrefTable))
	tablesExist |= hTableExists(database, hfs->xrefTable);
    if (matches && tablesExist)
	{
	verbose(1, "ADDITIVE %s %f\n", hfs->searchName, hfs->searchPriority);
	}
    else if (matches)
	{
	verbose(2, "no table %s: %s%s%s\n", hfs->searchName, hfs->searchTable,
		isNotEmpty(hfs->xrefTable) ? " and/or " : "",
		isNotEmpty(hfs->xrefTable) ? hfs->xrefTable : "");
	}
    else
	{
	verbose(2, "no match %s: %s\n", hfs->searchName, hfs->termRegex);
	}
    }
endMs = clock1000();
printf("\nTook %dms to determine multiple/additive searches.\n"
       "(These won't happen if it short-circuits.)\n\n",
       endMs - startMs);

if (isNotEmpty(termToSearch))
    {
    cartSetString(cart, "db", database);
    char *position = cloneString(termToSearch);
    struct dyString *dyWarn = dyStringNew(0);
    startMs = clock1000();
    struct hgPositions *hgp = hgFindSearch(cart, &position, &chrom, &chromStart, &chromEnd,
                                           "checkHgFindSpec", dyWarn);
    endMs = clock1000();
    if (isNotEmpty(dyWarn->string))
        warn("%s", dyWarn->string);
    if (hgp->singlePos != NULL)
	{
	struct hgPos *pos = hgp->singlePos;
	char *table = "[No reported table!]";
	char *name = pos->name ? pos->name : "";
	char *browserName = pos->browserName ? pos->browserName : "";
	char *description = pos->description ? pos->description : "";
	if (hgp->tableList != NULL)
	    table = hgp->tableList->name;
	printf("\nSingle result for %s from %s: %s:%d-%d   [%s | %s | %s]\n",
	       termToSearch, table, chrom, chromStart+1, chromEnd,
	       name, browserName, description);
	}
    else
        hgPositionsHtml(database, hgp, "checkHgFindSpec", cart);
    printf("\nTook %dms to search for %s.\n\n",
	   endMs - startMs, termToSearch);
    }

hgFindSpecFreeList(&shortList);
hgFindSpecFreeList(&longList);
return(gotError);
}

static char *getFieldFromQuery(char *query, char *searchName)
/* Get the value of the field that's being searched in query. */
{
char *ptr = strstr(query, " where ");
char *field = NULL;
if (ptr == NULL)
    errAbort("Can't find \" where \" in query \"%s\" for search %s",
	     query, searchName);
field = cloneString(ptr + strlen(" where "));
ptr = strchr(field, '=');
if (ptr == NULL)
    ptr = strstr(field, " like ");
if (ptr == NULL)
    ptr = strstr(field, " rlike ");
if (ptr == NULL)
    errAbort("Can't find \"=\" or \" like \" after \" where %s\" in query "
	     "\"%s\" for search %s",
	     field, query, searchName);
*ptr = 0;
return(trimSpaces(field));
}

static boolean checkRegexOnTableField(char *exp, char *altExp, char *table,
				      char *field, char *searchName)
/* Return TRUE and complain if any values of table.field do not match exp. */
{
struct sqlConnection *conn = hAllocConn(database);
struct sqlResult *sr = NULL;
char **row = NULL;
int errCount = 0;
char buf[512];
sqlSafef(buf, sizeof(buf), "select %s from %s", field, table);
sr = sqlGetResult(conn, buf);
while ((row = sqlNextRow(sr)) != NULL)
    {
    if (isEmpty(row[0]))
	continue;
    if (! regexMatchNoCase(row[0], exp))
	{
	if (isNotEmpty(altExp) && regexMatchNoCase(row[0], altExp))
	    continue;
	if (errCount < 1 ||
	    (errCount < 10 && verboseLevel() > 1))
	    {
	    printf("Error: %s.%s.%s value \"%s\" doesn't match termRegex \"%s\"",
		   database, table, field, row[0], exp);
	    if (isNotEmpty(altExp))
		printf(" or dontCheck \"%s\"", altExp);
	    printf(" for search %s\n", searchName);
	    }
	errCount++;
	}
    }
if (errCount > 0)
    verbose(2, "Search %s: %d values of %s.%s overlooked.\n",
	    searchName, errCount, table, field);
sqlFreeResult(&sr);
hFreeConn(&conn);
return(errCount > 0);
}

boolean doCheckTermRegex()
/* For each search that includes a regex, make sure that all values of the 
 * target table field match the regex -- otherwise those values would be 
 * invisible to a search. */
{
struct hgFindSpec *shortList = NULL, *longList = NULL, *wholeList = NULL;
struct hgFindSpec *hfs = NULL;
boolean gotError = FALSE;

hgFindSpecGetAllSpecs(database, &shortList, &longList);
wholeList = slCat(shortList, longList);

puts("\n");
for (hfs = wholeList;  hfs != NULL;  hfs = hfs->next)
    {
    if (isNotEmpty(hfs->termRegex))
	{
	char *table = NULL, *query = NULL;
	if (isNotEmpty(hfs->xrefTable))
	    {
	    table = hfs->xrefTable;
	    query = hfs->xrefQuery;
	    }
	else
	    {
	    table = hfs->searchTable;
	    query = hfs->query;
	    }
	if (isNotEmpty(query))
	    {
	    struct slName *tableList = hSplitTableNames(database, table);
	    struct slName *tPtr = NULL;
	    char *termPrefix = hgFindSpecSetting(hfs, "termPrefix");
	    char *field = getFieldFromQuery(query, hfs->searchName);
	    char *termRegex = hfs->termRegex;
	    char *altRegex = hgFindSpecSetting(hfs, "dontCheck");
	    if (termPrefix != NULL && startsWith(termPrefix, termRegex+1))
		termRegex += strlen(termPrefix)+1;
	    verbose(2, "Checking termRegex \"%s\" for table %s (search %s).\n",
		    termRegex, table, hfs->searchName);
	    for (tPtr = tableList;  tPtr != NULL;  tPtr = tPtr->next)
		{
		gotError |= checkRegexOnTableField(termRegex, altRegex,
				       tPtr->name, field, hfs->searchName);
		}
	    }
	}
    }

hgFindSpecFreeList(&wholeList);
return(gotError);
}

boolean doCheckIndexes()
/* For each search, make sure there's an index on the right table field(s). */
{
struct hgFindSpec *shortList = NULL, *longList = NULL, *wholeList = NULL;
struct hgFindSpec *hfs = NULL;
struct slName *allChroms = hAllChromNames(database);
boolean gotError = FALSE;

hgFindSpecGetAllSpecs(database, &shortList, &longList);
wholeList = slCat(shortList, longList);

puts("\n");
for (hfs = wholeList;  hfs != NULL;  hfs = hfs->next)
    {
    if (isNotEmpty(hfs->query) && hTableOrSplitExists(database, hfs->searchTable))
	{
	char *field = getFieldFromQuery(hfs->query, hfs->searchName);
	struct slName *tableList = hSplitTableNames(database, hfs->searchTable);
	struct slName *tPtr = NULL;
	for (tPtr = tableList;  tPtr != NULL;  tPtr = tPtr->next)
	    {
	    if (! hFieldHasIndex(database, tPtr->name, field))
		{
		gotError = TRUE;
		printf("Error: No SQL index defined for %s.%s.%s (search %s)\n",
		       database, tPtr->name, field, hfs->searchName);
		}
	    else
		verbose(2, "Index exists for %s.%s (search %s)\n",
			tPtr->name, field, hfs->searchName);
	    }
	}
    if (isNotEmpty(hfs->xrefQuery) && hTableOrSplitExists(database, hfs->xrefTable))
	{
	char *field = getFieldFromQuery(hfs->xrefQuery, hfs->searchName);
	if (! hFieldHasIndex(database, hfs->xrefTable, field))
	    {
	    gotError = TRUE;
	    printf("Error: No SQL index defined for %s.%s.%s (search %s)\n",
		   database, hfs->xrefTable, field, hfs->searchName);
	    }
	else
	    verbose(2, "Index exists for %s.%s.%s (search %s)\n",
		    database, hfs->xrefTable, field, hfs->searchName);
	}
    }

slFreeList(&allChroms);
hgFindSpecFreeList(&wholeList);
return(gotError);
}

char *getExampleFor(char *searchName)
/* Randomly choose a search field value -- if it comes from the table,
 * we should be able to search for it and find our way back to the table. */
{
char *example = NULL;

errAbort("Sorry, -exampleFor=search not implemented yet.");

return(example);
}

boolean doMakeExamples()
/* Print out an HTML table of position examples for description.html. */
{
boolean gotError = FALSE;

errAbort("Sorry, -makeExamples not implemented yet.");

return(gotError);
}


int checkHgFindSpec(char *db, char *termToSearch, boolean showSearches,
		    boolean checkTermRegex, char *exampleFor,
		    boolean checkIndexes, boolean makeExamples)
/* Perform searches/checks as specified, summarize errors, 
 * return nonzero if there are errors. */
{
boolean gotError = FALSE;

database = db;
initGenbankTableNames("gbMeta");

if (isNotEmpty(termToSearch))
    gotError |= reportSearch(termToSearch);
if (showSearches)
    gotError |= reportSearch(NULL);
if (checkTermRegex)
    gotError |= doCheckTermRegex();
if (isNotEmpty(exampleFor))
    {
    termToSearch = getExampleFor(exampleFor);
    gotError |= reportSearch(termToSearch);
    }
if (checkIndexes)
    gotError |= doCheckIndexes();
if (makeExamples)
    gotError |= doMakeExamples();

return gotError;
}


/* Just a placeholder -- we don't do anything with the cart. */
char *excludeVars[] = { NULL };


int main(int argc, char *argv[])
{
char   *termToSearch   = NULL;
boolean showSearches   = FALSE;
boolean checkTermRegex = FALSE;
char   *exampleFor     = NULL;
boolean checkIndexes   = FALSE;
boolean makeExamples   = FALSE;

optionInit(&argc, argv, optionSpecs);
/* Allow "checkHgFindSpec db" or "checkHgFindSpec db termToSearch" usage: */
if (termToSearch == NULL && argc == 3)
    {
    termToSearch = argv[2];
    argc--;
    }
if (argc != 2)
    usage();

showSearches = optionExists("showSearches");
checkTermRegex = optionExists("checkTermRegex");
exampleFor = optionVal("exampleFor", exampleFor);
checkIndexes = optionExists("checkIndexes");
makeExamples = optionExists("makeExamples");

/* If no termToSearch or options are specified, do showSearches. */
if (termToSearch == NULL && exampleFor == NULL &&
    !showSearches && !checkTermRegex && !checkIndexes && !makeExamples)
    showSearches = TRUE;

cgiSpoof(&argc, argv);
cart = cartAndCookie(hUserCookie(), excludeVars, NULL);
return checkHgFindSpec(argv[1], termToSearch, showSearches, checkTermRegex,
		       exampleFor, checkIndexes, makeExamples);
}
