/* chromGraphFactory - turn user-uploaded track in chromGraph format
 * into a customTrack. */

/* Copyright (C) 2014 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */

#include "common.h"
#include "hash.h"
#include "linefile.h"
#include "errCatch.h"
#include "jksql.h"
#include "hdb.h"
#include "chromInfo.h"
#include "chromGraph.h"
#include "customTrack.h"
#include "customPp.h"
#include "customFactory.h"
#include "chromGraphFactory.h"
#include "trashDir.h"


#define affy500Table "snpArrayAffy500"
#define affy6Table "snpArrayAffy6"
#define affy6SVTable "snpArrayAffy6SV"

#define illumina300Table "snpArrayIllumina300"
#define illumina550Table "snpArrayIllumina550"
#define illumina650Table "snpArrayIllumina650"
#define illumina1MTable "snpArrayIllumina1M"

#define agilentCgh244ATable "agilentCgh244a"

typedef int (*Chopper)(char *line, char **cols, int maxCol);
/* A function that breaks a row into columns. */

struct chromPos
/* Just chromosome and position */
    {
    char *chrom;	/* Not allocated here */
    int pos;
    };

struct labeledFile
/* A list of file and label */
    {
    struct labeledFile *next;
    char *fileName;	/* File name */
    char *label;	/* Label */
    struct chromGraph *cgList;	/* List of associated chrom graphs */
    double minVal,maxVal;	/* Min/mas values in graph */
    };

struct labeledFile *labeledFileNew(char *fileName, char *label)
/* Create new labeledFile struct */
{
struct labeledFile *el;
AllocVar(el);
el->fileName = cloneString(fileName);
el->label = cloneString(label);
return el;
}

static boolean chromGraphRecognizer(struct customFactory *fac,
	struct customPp *cpp, char *type, 
    	struct customTrack *track)
/* Return TRUE if looks like we're handling a chromGraph track */
{
return sameOk(type, fac->name);
}

static int commaChopper(char *line, char **cols, int maxCol)
/* Chop line by commas and return line count */
{
return chopByChar(line, ',', cols, maxCol);
}

static int tabChopper(char *line, char **cols, int maxCol)
/* Chop line by commas and return line count */
{
return chopByChar(line, '\t', cols, maxCol);
}

static Chopper getChopper(char *formatType)
/* Get appropriate chopper function for format type */
{
if (sameString(formatType, cgfFormatTab))
    return tabChopper;
else if (sameString(formatType, cgfFormatComma))
    return commaChopper;
else if (sameString(formatType, cgfFormatSpace))
    return chopByWhite;
else
    {
    internalErr();
    return NULL;
    }
}

static struct slName *getPreview(struct customPp *cpp, int count)
/* Get preview lines. */
{
struct slName *list = NULL, *el;
int i;
for (i=0; i<count; ++i)
    {
    char *s = customFactoryNextRealTilTrack(cpp);
    if (s == NULL)
        break;
    el = slNameNew(s);
    slAddHead(&list, el);
    }
slReverse(&list);
return list;
}

static void returnPreview(struct customPp *cpp, struct slName **pList)
/* Return list of lines back to cpp for reuse. */
{
struct slName *el;
slReverse(pList);
for (el = *pList; el != NULL; el = el->next)
    customPpReuse(cpp, el->name);
slNameFreeList(pList);
}

static int delimitedTableSize(struct slName *list, int colDelim)
/* Return number of columns in list delimited by delimiter.  Returns
 * 0 if count is inconsistent. */
{
struct slName *el;
int expected = 0;
for (el = list; el != NULL; el = el->next)
    {
    int count = countChars(el->name, colDelim) + 1;
    if (expected == 0)
	{
        expected = count;
	}
    else
	{
        if (expected != count)
	    return 0;
	}
    }
return expected;
}

static int spaceDelimitedTableSize(struct slName *list)
/* Return number of columns in list delimited by whitespace.  Returns
 * 0 if count is inconsistent. */
{
struct slName *el;
int expected = 0;
for (el = list; el != NULL; el = el->next)
    {
    int count = chopByWhite(el->name, NULL, 0);
    if (expected == 0)
        expected = count;
    else
	{
        if (expected != count)
	    return 0;
	}
    }
return expected;
}

static boolean figureOutFormat(struct slName *lineList, 
	char **retType, int *retCols)
/* Figure out format and number of columns. Return FALSE if we can't
 * figure it out, otherwise return TRUE, and extra info in retType/retCols. */
{
int colCount;
char *type = cgfFormatGuess;
boolean ok = TRUE;

if ((colCount = spaceDelimitedTableSize(lineList)) > 1)
    {
    type = cgfFormatSpace;
    }
else if ((colCount = delimitedTableSize(lineList, '\t')) > 1)
    {
    type = cgfFormatTab;
    }
else if ((colCount = delimitedTableSize(lineList, ',')) > 1)
    {
    type = cgfFormatComma;
    }
else 
    {
    type = cgfFormatGuess;
    ok = FALSE;
    }
*retType = type;
*retCols = colCount;
return ok;
}

int countColumns(struct slName *list, char *formatType)
/* Return number of columns. */
{
int count = 0;
char *line = list->name;
if (sameString(formatType, cgfFormatTab))
    count = countChars(line, '\t') + 1;
else if (sameString(formatType, cgfFormatComma))
    count = countChars(line, ',') + 1;
else if (sameString(formatType, cgfFormatSpace))
    count = chopByWhite(line, NULL, 0);
else
    internalErr();
return count;
}

struct markerTypeRecognizer
/* Helper to recognize a marker type. */
    {
    struct markerTypeRecognizer *next;
    char *type;		/* Type	string from UI */
    char *table;	/* Table to assist in recognition */
    char *query; 	/* Query (with %s for item, %s for table)  */
    int matches;	/* Count of matches. */
    };

static struct markerTypeRecognizer *getMarkerRecognizers(
	struct sqlConnection *conn, int colCount)
/* Return list of marker recognizers. */
{
struct markerTypeRecognizer *list = NULL, *mtr;
char *table;

/* STS marker recognizer */
table = "stsAlias";
if (sqlTableExists(conn, table))
    {
    AllocVar(mtr);
    mtr->type = cgfMarkerSts;
    mtr->table = table;
    mtr->query = "select count(*) from %s where alias='%s'";
    slAddHead(&list, mtr);
    }

/* Affy 500 recognizer */
table = affy500Table;
if (sqlTableExists(conn, table))
    {
    AllocVar(mtr);
    mtr->type = cgfMarkerAffy500;
    mtr->table = table;
    mtr->query = "select count(*) from %s where name='%s'";
    slAddHead(&list, mtr);
    }

/* Affy 6 recognizer */
table = affy6Table;
if (sqlTableExists(conn, table))
    {
    AllocVar(mtr);
    mtr->type = cgfMarkerAffy6;
    mtr->table = table;
    mtr->query = "select count(*) from %s where name='%s'";
    slAddHead(&list, mtr);
    }

/* Affy 6SV recognizer */
table = affy6SVTable;
if (sqlTableExists(conn, table))
    {
    AllocVar(mtr);
    mtr->type = cgfMarkerAffy6SV;
    mtr->table = table;
    mtr->query = "select count(*) from %s where name='%s'";
    slAddHead(&list, mtr);
    }

/* Illumina 300 recognizer */
table = illumina300Table;
if (sqlTableExists(conn, table))
    {
    AllocVar(mtr);
    mtr->type = cgfMarkerHumanHap300;
    mtr->table = table;
    mtr->query = "select count(*) from %s where name='%s'";
    slAddHead(&list, mtr);
    }

/* Illumina 550 recognizer */
table = illumina550Table;
if (sqlTableExists(conn, table))
    {
    AllocVar(mtr);
    mtr->type = cgfMarkerHumanHap550;
    mtr->table = table;
    mtr->query = "select count(*) from %s where name='%s'";
    slAddHead(&list, mtr);
    }

/* Illumina 650 recognizer */
table = illumina650Table;
if (sqlTableExists(conn, table))
    {
    AllocVar(mtr);
    mtr->type = cgfMarkerHumanHap650;
    mtr->table = table;
    mtr->query = "select count(*) from %s where name='%s'";
    slAddHead(&list, mtr);
    }

/* Illumina 1M recognizer */
table = illumina1MTable;
if (sqlTableExists(conn, table))
    {
    AllocVar(mtr);
    mtr->type = cgfMarkerHumanHap1M;
    mtr->table = table;
    mtr->query = "select count(*) from %s where name='%s'";
    slAddHead(&list, mtr);
    }

/* Agilent CGH 244A */
table = agilentCgh244ATable;
if (sqlTableExists(conn, table))
    {
    AllocVar(mtr);
    mtr->type = cgfMarkerAgilentCgh244A;
    mtr->table = table;
    mtr->query = "select count(*) from %s where name='%s'";
    slAddHead(&list, mtr);
    }

/* SNP table */
table = hFindLatestSnpTableConn(conn, NULL);
if (table != NULL)
    {
    AllocVar(mtr);
    mtr->type = cgfMarkerSnp;
    mtr->table = table;
    mtr->query = "select count(*) from %s where name='%s'";
    slAddHead(&list, mtr);
    }

/* Chromosome */
table = "chromInfo";
if (colCount >= 3 && sqlTableExists(conn, table))
    {
    AllocVar(mtr);
    mtr->type = cgfMarkerGenomic;
    mtr->table = table;
    mtr->query = "select count(*) from %s where chrom='%s'";
    slAddHead(&list, mtr);
    }

return list;
}

static char *guessMarkerType(struct slName *lineList, Chopper chopper, 
	struct sqlConnection *conn, int colCount)
/* Guess which type of marker type is being used. */
{
struct markerTypeRecognizer *mtrList = getMarkerRecognizers(conn, colCount);
struct markerTypeRecognizer *mtr, *bestMtr = NULL;
int lineCount = 0, bestCount = 0;
struct slName *el;
char *type = NULL;

/* Loop through sample lines keeping track of who recognizes markers. */
for (el = lineList; el != NULL; el = el->next)
    {
    char *s = cloneString(el->name);
    char *row[1];
    chopper(s, row, ArraySize(row));
    for (mtr = mtrList; mtr != NULL; mtr = mtr->next)
        {
	char query[512];
	sqlSafef(query, sizeof(query), mtr->query, mtr->table, row[0]);
	if (sqlQuickNum(conn, query) > 0)
	    mtr->matches += 1;
	}
    freeMem(s);
    ++lineCount;
    }

/* Figure out who recognizes the most markers. */
for (mtr = mtrList; mtr != NULL; mtr = mtr->next)
    {
    if (mtr->matches > bestCount)
         {
	 bestCount = mtr->matches;
	 bestMtr = mtr;
	 }
    }

/* Return best type, so long as at least half are recognized */
if (bestCount > lineCount/2)
    type = bestMtr->type;
slFreeList(&mtrList);
return type;
}

static boolean firstRowConsistentWithData(char *line, Chopper chopper, 
	int colCount)
/* Return TRUE if first row looks like it is real data (all numbers except
 * for marker column. */
{
char *dupe = cloneString(line);
char **row;
AllocArray(row, colCount);
chopper(dupe, row, colCount);
int i;
boolean ok = TRUE;
for (i=1; i<colCount; ++i)
    {
    char *col = row[i];
    char c = col[0];
    if (!(isdigit(c) || (c == '-' && isdigit(col[1]))))
        {
	ok = FALSE;
	break;
	}
    }
freeMem(dupe);
return ok;
}

static int markerCols(char *markerType)
/* The number of columns used for location. */
{
if (sameString(markerType, cgfMarkerGenomic))
    return 2;
else
    return 1;
}

static boolean allWhite(char *s)
/* Return TRUE if s is nothing but white space */
{
s = skipLeadingSpaces(s);
return s[0] == 0;
}

static void readLabels(struct customPp *cpp, int dataStart, Chopper chopper,
	char **row, int colCount, struct labeledFile *fileList)
/* Read in first nonempty line of file and fill in labels from it. */
{
char *line;
int colsRead;
int i;
struct labeledFile *fileEl;
if ((line = customFactoryNextRealTilTrack(cpp)) == NULL)
    errAbort("%s is empty", cpp->fileStack ? cpp->fileStack->fileName : "input");
colsRead = chopper(line, row, colCount);
if (colCount != colsRead)
    {
    if (cpp->fileStack)
	errAbort("Expecting %d words line %d of %s got %d", 
	    colCount, cpp->fileStack->lineIx, cpp->fileStack->fileName, colsRead);
    else
    	errAbort("Expecting %d words got %d", 
	    colCount, colsRead);
    }
for (i=dataStart, fileEl = fileList; i < colCount; i++, fileEl = fileEl->next)
    {
    char *label = row[i];
    if (!allWhite(label))
	fileEl->label = cloneString(row[i]);
    }
}

static struct hash *chromInfoHash(struct sqlConnection *conn)
/* Build up hash of chromInfo keyed by name */
{
struct sqlResult *sr;
char **row;
struct hash *hash = hashNew(0);
char query[1024];
sqlSafef(query, sizeof query, "select * from chromInfo");
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct chromInfo *ci = chromInfoLoad(row);
    hashAdd(hash, ci->chrom, ci);
    }
sqlFreeResult(&sr);
return hash;
}

static int cppNeedNum(struct customPp *cpp, char *words[], int wordIx)
/* Make sure that words[wordIx] is an ascii integer, and return
 * binary representation of it. Conversion stops at first non-digit char. */
{
char *ascii = words[wordIx];
char c = ascii[0];
struct lineFile *lf = cpp->fileStack;
if (c != '-' && !isdigit(c))
    {
    if (lf)
    	errAbort("Expecting number field %d line %d of %s, got %s", 
	    wordIx+1, lf->lineIx, lf->fileName, ascii);
    else
    	errAbort("Expecting number field %d got %s", 
	    wordIx+1, ascii);
    }
return atoi(ascii);
}

double cppNeedDouble(struct customPp *cpp, char *words[], int wordIx)
/* Make sure that words[wordIx] is an ascii double value, and return
 * binary representation of it. */
{
char *valEnd;
char *val = words[wordIx];
double doubleValue;
struct lineFile *lf = cpp->fileStack;

doubleValue = strtod(val, &valEnd);
if ((*val == '\0') || (*valEnd != '\0'))
    {
    if (lf)
	errAbort("Expecting double field %d line %d of %s, got %s",
	    wordIx+1, lf->lineIx, lf->fileName, val);
    else
	errAbort("Expecting double field %d got %s",
	    wordIx+1, val);
    }
return doubleValue;
}

void cppVaAbort(struct customPp *cpp, char *format, va_list args)
/* Print file name, line number, and error message, and abort. */
{
struct dyString *dy = dyStringNew(0);
struct lineFile *lf = cpp->fileStack;
if (lf)
    dyStringPrintf(dy,  "Error line %d of %s: ", lf->lineIx, lf->fileName);
else
    dyStringPrintf(dy,  "Error: ");
dyStringVaPrintf(dy, format, args);
errAbort("%s", dy->string);
dyStringFree(&dy);
}

void cppAbort(struct customPp *cpp, char *format, ...)
/* Print file name, line number, and error message, and abort. */
{
va_list args;
va_start(args, format);
cppVaAbort(cpp, format, args);
va_end(args);
}



static void processGenomic(struct sqlConnection *conn, struct customPp *cpp, 
	int colCount, char *formatType, boolean firstLineLabels,
	struct labeledFile *fileList, boolean report)
/* Process three column file into chromGraph.  Abort if
 * there's a problem. */
{
char **row;
struct chromGraph *cg;
struct hash *chromHash = chromInfoHash(conn);
struct labeledFile *fileEl;
struct chromInfo *ci;
int i;
int rowCount = 0;
Chopper chopper = getChopper(formatType);
char *line;

AllocArray(row, colCount);
if (firstLineLabels)
    readLabels(cpp, 2, chopper, row, colCount, fileList);
while ((line = customFactoryNextRealTilTrack(cpp)) != NULL)
    {
    chopper(line, row, colCount);
    char *chrom = cloneString(row[0]);
    int start = cppNeedNum(cpp, row, 1);
    ci = hashFindVal(chromHash, chrom);
    if (ci == NULL)
	cppAbort(cpp, "Chromosome %s not found in this assembly (%s).", 
		 chrom, sqlGetDatabase(conn));
    if (start >= ci->size)
	{
	cppAbort(cpp, "Chromosome %s is %d bases long, but got coordinate %d.",
		 chrom, ci->size, start);
	}
    else if (start < 0)
        {
	cppAbort(cpp, "Negative base position %d on chromosome %s.",
		 start, chrom);
	}
    for (i=2, fileEl = fileList; i<colCount; ++i, fileEl = fileEl->next)
	{
	char *val = row[i];
	if (val[0] != 0)
	    {
	    AllocVar(cg);
	    cg->chrom = chrom;
	    cg->chromStart = start;
	    cg->val = cppNeedDouble(cpp, row, i);
	    slAddHead(&fileEl->cgList, cg);
	    }
	}
    ++rowCount;
    }
if (report)
    printf("Read in %d markers and values in <i>chromosome base</i> format.<BR>\n", 
	rowCount);
}

struct hash *tableToChromPosHash(struct sqlConnection *conn, char *table, 
	char *sql)
/* Create hash of chromPos keyed by name field. */ 
{
char query[256];
struct sqlResult *sr;
char **row;
struct hash *hash = newHash(23);
struct lm *lm = hash->lm;
struct chromPos *pos;
sqlSafef(query, sizeof(query), sql, table);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    lmAllocVar(lm, pos);
    pos->chrom = lmCloneString(lm, row[0]);
    pos->pos = sqlUnsigned(row[1]);
    touppers(row[2]);
    hashAdd(hash, row[2], pos);
    }
sqlFreeResult(&sr);
return hash;
}

struct hash *tableToAliasHash(struct sqlConnection *conn, char *table,
	char *query)
/* Create hash of true name keyed by alias */
{
struct sqlResult *sr;
char **row;
struct hash *hash = hashNew(19);
char buf[256];
sqlSafef(buf, sizeof(buf), query, table);
sr = sqlGetResult(conn, buf);
while ((row = sqlNextRow(sr)) != NULL)
    {
    touppers(row[0]);
    touppers(row[1]);
    hashAdd(hash, row[0], lmCloneString(hash->lm, row[1]));
    }
sqlFreeResult(&sr);
return hash;
}


struct chromPos *snpIndexLookup(struct sqlConnection *conn, char *table, char *query, char *name)
/* Lookup the marker name in the snp table using the name index.
 * As the snp table grew beyond 150 million rows, 
 * loading the entire table into a hash from the database became too slow. */
{
static struct chromPos pos;
struct dyString *fullQuery = sqlDyStringCreate(query, table);
sqlDyStringPrintf(fullQuery, " where name='%s'", name);
struct sqlResult *sr;
char **row;
sr = sqlGetResult(conn, dyStringContents(fullQuery));
if ((row = sqlNextRow(sr)) != NULL)
    {
    pos.chrom = cloneString(row[0]);
    pos.pos = sqlUnsigned(row[1]);
    }
sqlFreeResult(&sr);
dyStringFree(&fullQuery);
if (!row)
    return NULL;
return &pos;
}

void processDb(struct sqlConnection *conn,
	struct customPp *cpp, int colCount, char *formatType, 
	boolean firstLineLabels, struct labeledFile *fileList, 
	char *table, char *query, char *aliasTable, char *aliasQuery,
	boolean report, boolean isSnpTable)
/* Process two column input file into chromGraph.  Treat first
 * column as a name to look up in bed-format table, which should
 * not be split. Return TRUE on success. */
{
struct hash *hash = NULL;
if (!isSnpTable)
    hash = tableToChromPosHash(conn, table, query);
struct hash *aliasHash = NULL;
char **row;
int match = 0, total = 0;
struct chromGraph *cg;
struct chromPos *pos;

if (!isSnpTable && report)
    printf("Loaded %d elements from %s table for mapping.<BR>\n", 
    	hash->elCount, table);
if (aliasTable != NULL)
    {
    aliasHash = tableToAliasHash(conn, aliasTable, aliasQuery);
    if (report)
	printf("Loaded %d aliases from %s table as well.<BR>\n", 
		aliasHash->elCount, aliasTable);
    }

fflush(stdout); // trying to keep browser from timing out

AllocArray(row, colCount);
Chopper chopper = getChopper(formatType);
char *line;

AllocArray(row, colCount);
if (firstLineLabels)
    readLabels(cpp, 1, chopper, row, colCount, fileList);
while ((line = customFactoryNextRealTilTrack(cpp)) != NULL)
    {
    chopper(line, row, colCount);
    char *name = row[0];
    char *marker = cloneString(name);
    touppers(name);
    ++total;
    if (isSnpTable)
    	pos = snpIndexLookup(conn, table, query, name);
    else
	{
	pos = hashFindVal(hash, name);
	if (pos == NULL && aliasHash != NULL)
	    {
	    name = hashFindVal(aliasHash, name);
	    if (name != NULL)
		pos = hashFindVal(hash, name);
	    }
	}
    if (pos != NULL)
        {
	int i;
	struct labeledFile *fileEl;
	++match;
	for (i=1, fileEl=fileList; i < colCount; ++i, fileEl = fileEl->next)
	    {
	    char *val = row[i];
	    if (val[0] != 0)
		{
		AllocVar(cg);
		cg->chrom = pos->chrom;
		cg->chromStart = pos->pos;
		cg->val = cppNeedDouble(cpp, row, i);
		cg->marker = cloneString(marker);
		slAddHead(&fileEl->cgList, cg);
		}
	    }
	}
    freeMem(marker);
    }
if (report)
    printf("Mapped %d of %d (%3.1f%%) of markers<BR>\n", match, total, 
	    100.0*match/total);
}

boolean mayProcessGenomic(struct sqlConnection *conn, struct customPp *cpp, 
	int colCount, char *formatType, boolean firstLineLabels, 
	struct labeledFile *fileList, boolean report)
/* Process three column file into chromGraph.  If there's a problem
 * print warning message and return FALSE. */
{
struct errCatch *errCatch = errCatchNew();
if (errCatchStart(errCatch))
     processGenomic(conn, cpp, colCount, formatType, firstLineLabels, fileList,
     	report);
return errCatchFinish(&errCatch);
}

boolean mayProcessDb(struct sqlConnection *conn,
	struct customPp *cpp, int colCount, char *formatType, 
	boolean firstLineLabels, struct labeledFile *fileList, 
	char *table, char *query, char *aliasTable, char *aliasQuery, 
	boolean report, boolean isSnpTable)
/* Process database table into chromGraph.  If there's a problem
 * print warning message and return FALSE. */
{
struct errCatch *errCatch = errCatchNew();
if (errCatchStart(errCatch))
     processDb(conn, cpp, colCount, formatType, firstLineLabels, 
     	fileList, table, query, aliasTable, aliasQuery, report, isSnpTable);
return errCatchFinish(&errCatch);
}

static struct labeledFile *parseToLabeledFiles(struct customPp *cpp,
	int colCount, char *formatType, char *markerType,
	boolean firstLineLabels, struct sqlConnection *conn, boolean report)
/* Parse out cpp until next track, creating a list of labeled
 * binary files. */
{
/* Allocate a labeledFile for each column of real data. */
struct labeledFile *fileList = NULL, *fileEl;
int posColCount = markerCols(markerType);
int i;
for (i=posColCount; i<colCount; ++i)
    {
    struct tempName tempName;
    char buf[16];
    safef(buf, sizeof(buf), "hggUp%d", i);
    trashDirFile(&tempName, "hgg", buf, ".cgb");
    safef(buf, sizeof(buf), "%d", i+1-posColCount);
    fileEl = labeledFileNew(tempName.forCgi, buf);
    slAddHead(&fileList, fileEl);
    }
slReverse(&fileList);

boolean ok = FALSE;
if (sameString(markerType, cgfMarkerGenomic))
    ok = mayProcessGenomic(conn, cpp, colCount, formatType, 
    	firstLineLabels, fileList, report);
else if (sameString(markerType, cgfMarkerSts))
    ok = mayProcessDb(conn, cpp, colCount, formatType, 
    	firstLineLabels, fileList, "stsMap",
    	"select chrom,round((chromStart+chromEnd)*0.5),name from %s",
	"stsAlias", "select alias,trueName from %s", report, FALSE);
else if (sameString(markerType, cgfMarkerSnp))
    {
    char *snpTable = hFindLatestSnpTableConn(conn, NULL);
    if (snpTable == NULL)
        errAbort("No SNP table in %s", sqlGetDatabase(conn));
    char *query = "select chrom,chromStart,name from %s";
    ok = mayProcessDb(conn, cpp, colCount, formatType, 
	    firstLineLabels, fileList, snpTable, 
	    query, NULL, NULL, report, TRUE);
    }
else if (sameString(markerType, cgfMarkerAffy100))
    {
    warn("Support for Affy 100k chip coming soon.");
    }
else if (sameString(markerType, cgfMarkerAffy500)
      || sameString(markerType, cgfMarkerAffy6)
      || sameString(markerType, cgfMarkerAffy6SV)
      || sameString(markerType, cgfMarkerHumanHap300)
      || sameString(markerType, cgfMarkerHumanHap550)
      || sameString(markerType, cgfMarkerHumanHap650)
      || sameString(markerType, cgfMarkerHumanHap1M)
      || sameString(markerType, cgfMarkerAgilentCgh244A)
        )
    {
    char *table = "";
    if (sameString(markerType, cgfMarkerAffy500)) table = affy500Table;
    if (sameString(markerType, cgfMarkerAffy6))   table = affy6Table;
    if (sameString(markerType, cgfMarkerAffy6SV)) table = affy6SVTable;
    if (sameString(markerType, cgfMarkerHumanHap300)) table = illumina300Table;
    if (sameString(markerType, cgfMarkerHumanHap550)) table = illumina550Table;
    if (sameString(markerType, cgfMarkerHumanHap650)) table = illumina650Table;
    if (sameString(markerType, cgfMarkerHumanHap1M)) table = illumina1MTable;
    if (sameString(markerType, cgfMarkerAgilentCgh244A)) table = agilentCgh244ATable;
    if (!sqlTableExists(conn, table))
        errAbort("Sorry, no data for %s on this assembly.",
		markerType);
    ok = mayProcessDb(conn, cpp, colCount, formatType,
    	firstLineLabels, fileList, table,
    	"select chrom,chromStart,name from %s", NULL, NULL, report, FALSE);
    }
else
    {
    errAbort("Unknown identifier format. markerType=%s", markerType);
    }
if (ok)
    return fileList;
else
    {
    noWarnAbort();
    return NULL;
    }
}

static void saveLabeledFileList(struct labeledFile *fileList)
/* Save out list of labled files to all binary files. */
{
struct labeledFile *fileEl;
for (fileEl = fileList; fileEl != NULL; fileEl = fileEl->next)
    {
    slSort(&fileEl->cgList, chromGraphCmp);
    chromGraphToBinGetMinMax(fileEl->cgList, fileEl->fileName, 
    	&fileEl->minVal, &fileEl->maxVal);
    }
}

struct customTrack *chromGraphParser(char *genomeDb, struct customPp *cpp,
	char *formatType, char *markerType, char *columnLabels,
	char *name, char *description, struct hash *settings,
	boolean report)
/* Parse out a chromGraph file (not including any track lines) */
{
char *minVal = hashFindVal(settings, "minVal");
char *maxVal = hashFindVal(settings, "maxVal");

/* Get first lines of track.  If track empty then
 * might as well return NULL here. */
struct slName *preview = getPreview(cpp, 10);
if (preview == NULL)
    return NULL;

/* Figure out format type - scanning preview if it isn't well defined. */
struct sqlConnection *conn = hAllocConn(genomeDb);
int colCount;

if (sameString(formatType, cgfFormatGuess))
    {
    if (!figureOutFormat(preview, &formatType, &colCount))
	errAbort("Can't figure out format for chromGraph track %s", 
		emptyForNull(name));
    }
hashMayRemove(settings, "formatType");

/* Now that we know format can count columns and determine how to
 * chop up lines. */
colCount = countColumns(preview, formatType);
Chopper chopper = getChopper(formatType);

/* Figure out marker type - scanning marker column of preview if it isn't
 * well defined. */
if (sameString(markerType, cgfMarkerGuess))
    {
    markerType = guessMarkerType(preview, chopper, conn, colCount);
    if (markerType == NULL)
	errAbort("Can't figure out marker column type for chromGraph track %s",
		emptyForNull(name));
    }
hashMayRemove(settings, "markerType");

/* Figure out if columns are labeled in file, using preview if needed. */
if (sameString(columnLabels, cgfColLabelGuess))
    {
    if (firstRowConsistentWithData(preview->name, chopper, colCount))
	columnLabels = cgfColLabelNumbered;
    else
	columnLabels = cgfColLabelFirstRow;
    }
hashMayRemove(settings, "columnLabels");
returnPreview(cpp, &preview);
boolean labelsInData = sameString(columnLabels, cgfColLabelFirstRow);

/* Load data into list of labeled temp files. */
struct labeledFile *fileEl, *fileList;
fileList = parseToLabeledFiles(cpp, colCount, formatType, markerType,
    labelsInData, conn, report);
saveLabeledFileList(fileList);

/* Create a customTrack for each element in file list. */
struct customTrack *outputTracks = NULL;
for (fileEl = fileList; fileEl != NULL; fileEl = fileEl->next)
    {
    struct customTrack *track;
    AllocVar(track);
    struct trackDb *tdb = customTrackTdbDefault();
    track->tdb = tdb;

    /* Figure out short and long names and type*/
    char shortLabel[128];
    char longLabel[512];
    if (name == NULL)
        name = track->tdb->shortLabel;
    if (description == NULL)
        description = track->tdb->longLabel;
    if (colCount > 1 || labelsInData)
        {
	safef(shortLabel, sizeof(shortLabel), "%s %s", name, fileEl->label);
	safef(longLabel, sizeof(longLabel), "%s %s", description, fileEl->label);
	}
    else
        {
	safef(shortLabel, sizeof(shortLabel), "%s", name);
	safef(longLabel, sizeof(longLabel), "%s", description);
	}
    tdb->shortLabel = cloneString(shortLabel);
    tdb->longLabel = cloneString(longLabel);
    tdb->type = "chromGraph";
    tdb->track = customTrackTableFromLabel(tdb->shortLabel);
    tdb->table = cloneString(tdb->track);
    track->dbTableName = NULL;

    /* Create settings */
    struct dyString *dy = dyStringNew(0);
    dyStringAppend(dy, hashToRaString(settings));
    dyStringPrintf(dy, "binaryFile %s\n", fileEl->fileName);
    dyStringPrintf(dy, "type %s\n", tdb->type);
    dyStringPrintf(dy, "tdbType %s\n", tdb->type); /* Needed if outside factory */
    if (minVal == NULL)
        dyStringPrintf(dy, "minVal %g\n", fileEl->minVal);
    if (maxVal == NULL)
        dyStringPrintf(dy, "maxVal %g\n", fileEl->maxVal);
    tdb->settings = dyStringCannibalize(&dy);
    
    /* Add to list. */
    slAddHead(&outputTracks, track);
    }
hFreeConn(&conn);
slReverse(&outputTracks);
return outputTracks;
}


static struct customTrack *chromGraphLoader(struct customFactory *fac,  
	struct hash *chromHash, struct customPp *cpp, struct customTrack *track, 
	boolean dbRequested)
/* Load up chromGraph data until get next track line. */
{
/* See if binary file has already been made.  If so then
 * no need for rest of this routine. */
struct trackDb *tdb = track->tdb;
struct hash *settings = tdb->settingsHash;
char *binaryFileName = hashFindVal(settings, "binaryFile");
if (binaryFileName != NULL)
    {
    if (fileExists(binaryFileName))
	return track;
    else
        return NULL;
    }
return chromGraphParser(track->genomeDb, cpp, 
    trackDbSettingOrDefault(tdb, "formatType", cgfFormatGuess),
    trackDbSettingOrDefault(tdb, "markerType", cgfMarkerGuess),
    trackDbSettingOrDefault(tdb, "columnLabels", cgfColLabelGuess),
    trackDbSetting(tdb, "name"),
    trackDbSetting(tdb, "description"),
    tdb->settingsHash, FALSE);
}

struct customFactory chromGraphFactory = 
/* Factory for wiggle tracks */
    {
    NULL,
    "chromGraph",
    chromGraphRecognizer,
    chromGraphLoader,
    };

