/* hgSelect - select from genome tables, handling split tables and bin column */

/* Copyright (C) 2014 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
#include "common.h"
#include "options.h"
#include "dystring.h"
#include "hdb.h"
#include "jksql.h"
#include "verbose.h"


void usage(char *msg)
/* Explain usage and exit. */
{
errAbort(
    "%s\n"
    "hgSelect - select from genome tables, handling split tables and\n"
    "           bin column\n"
    "\n"
    "usage:\n"
    "   hgSelect [options] db table outFile\n"
    "\n"
    "Select rows from a table, handling select from split tables and dropping\n"
    "a bin column. \n"
    "\n"
    "Options:\n"
    "   -noRandom - Exclude *_random pseudo-chromsomes\n"
    "   -noHap - Exclude *_hap* pseudo-chromsomes\n"
    "   -where=where \n"
    "   -joinTbls=tbls - comman separated list other tables, if required\n"
    "    by -where\n"
    "   -verbose=n - 2 outputs SQL\n",
    msg);
}

/* command line */
static struct optionSpec optionSpec[] = {
    {"noRandom", OPTION_BOOLEAN},
    {"noHap", OPTION_BOOLEAN},
    {"where", OPTION_STRING},
    {"joinTbls", OPTION_STRING},
    {NULL, 0}
};

static boolean noRandom;
static boolean noHap;
static char *where;
static char *joinTbls;

static boolean inclChrom(char *chrom)
/* check if rows for chromosome should be included */
{
if (noRandom && (strstr(chrom, "_random") != NULL))
    return FALSE;
if (noHap && haplotype(chrom))
    return FALSE;
return TRUE;
}

static void outputRow(FILE *outFh, struct hTableInfo *tblInfo, int numCols, char **row)
/* output a row */
{
int startCol = (tblInfo->hasBin ? 1 : 0);
int i;
for (i = startCol; i < numCols; i++)
    {
    if (i > startCol)
        fputc('\t', outFh);
    fputs(row[i], outFh);
    }
fputc('\n', outFh);
}

static void addWhereOrAnd(struct dyString *query, int clauseCnt)
/* add a `where' or `and' to the query as indicated by clauseCnt  */
{
if (clauseCnt == 0)
    sqlDyStringPrintf(query, " where");
else
    sqlDyStringPrintf(query, " and");
}

static void addWhereClause(struct hTableInfo *tblInfo,
                           struct dyString *query)
/* Build up where clause. If this isn't a split table and there are chrom
 * restrictions, include chrom restiction in the where. */
{

int clauseCnt = 0;
if (where != NULL)
    {
    addWhereOrAnd(query, clauseCnt++);
    sqlDyStringPrintf(query, " %-s", where);
    }
if (!tblInfo->isSplit && noRandom)
    {
    addWhereOrAnd(query, clauseCnt++);
    sqlDyStringPrintf(query, " (%s not like \"%%__random\")", tblInfo->chromField);
    }
if (!tblInfo->isSplit && noHap)
    {
    addWhereOrAnd(query, clauseCnt++);
    sqlDyStringPrintf(query, " (%s not like \"%%__hap%%\" AND %s not like \"%%__alt%%\")", tblInfo->chromField, tblInfo->chromField);
    }
}

static void selectFromTable(char *table, struct hTableInfo *tblInfo,
                            struct sqlConnection *conn, FILE *outFh)
/* select from a table and output rows */
{
struct dyString *query = dyStringNew(0);
sqlDyStringPrintf(query, "SELECT %s.* FROM %s", table, table);
if (joinTbls != NULL)
    {
    sqlCkIl(joinTblsSafe,joinTbls)
    sqlDyStringPrintf(query, ",%-s", joinTblsSafe);
    }
addWhereClause(tblInfo, query);
verbose(2, "query: %s\n", query->string);

struct sqlResult *sr = sqlGetResult(conn, query->string);
int numCols = sqlCountColumns(sr);
char **row;
while ((row = sqlNextRow(sr)) != NULL)
    outputRow(outFh, tblInfo, numCols, row);
sqlFreeResult(&sr);
dyStringFree(&query);
}

static void selectFromSplitTable(char *db, char *table, struct hTableInfo *tblInfo,
                                 struct sqlConnection *conn, FILE *outFh)
/* select from a split table */
{
struct slName *chroms = hAllChromNames(db);
struct slName *chrom;
char chromTable[256];
for (chrom = chroms; chrom != NULL ; chrom = chrom->next)
    if (inclChrom(chrom->name))
        {
        safef(chromTable, sizeof(chromTable), "%s_%s", chrom->name, table);
        selectFromTable(chromTable, tblInfo, conn, outFh);
        }
}

void hgSelect(char *db, char *table, char *outFile)
/* select from genome tables, handling split tables and bin column */
{
struct hTableInfo *tblInfo;

/* get table info upfront so don't have to wait long find for error */
tblInfo = hFindTableInfo(db, NULL, table);
if (tblInfo == NULL)
    errAbort("Error: no table: %s or *_%s", table, table);

struct sqlConnection *conn = hAllocConn(db);
FILE* outFh = mustOpen(outFile, "w");
if (tblInfo->isSplit)
    selectFromSplitTable(db, table, tblInfo, conn, outFh);
else
    selectFromTable(table, tblInfo, conn, outFh);

carefulClose(&outFh);
}

int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, optionSpec);
if (argc != 4)
    usage("wrong # of args");
noRandom = optionExists("noRandom");
noHap = optionExists("noHap");
where = optionVal("where", NULL);
// TRUST commandline SQL where clause.
if (where)
    {
    struct dyString *dyWhere = sqlDyStringCreate(where, NULL);
    where = dyStringCannibalize(&dyWhere);
    }

joinTbls = optionVal("joinTbls", NULL);

hgSelect(argv[1], argv[2], argv[3]);
return 0;
}
