/* chromAlias.c was originally generated by the autoSql program, which also 
 * generated chromAlias.h and chromAlias.sql.  This module links the database and
 * the RAM representation of objects. */

#include <pthread.h>
#include "common.h"
#include "linefile.h"
#include "dystring.h"
#include "jksql.h"
#include "cart.h"
#include "chromAlias.h"
#include "hdb.h"
#include "trackHub.h"
#include "fieldedTable.h"
#include "bigBed.h"
#include "bbiAlias.h"
#include "bPlusTree.h"
#include "errCatch.h"


char *chromAliasCommaSepFieldNames = "alias,chrom,source";

void chromAliasStaticLoad(char **row, struct chromAlias *ret)
/* Load a row from chromAlias table into ret.  The contents of ret will
 * be replaced at the next call to this function. */
{

ret->alias = row[0];
ret->chrom = row[1];
ret->source = row[2];
}

struct chromAlias *chromAliasLoad(char **row)
/* Load a chromAlias from row fetched with select * from chromAlias
 * from database.  Dispose of this with chromAliasFree(). */
{
struct chromAlias *ret;

AllocVar(ret);
ret->alias = cloneString(row[0]);
ret->chrom = cloneString(row[1]);
ret->source = cloneString(row[2]);
return ret;
}

struct chromAlias *chromAliasLoadAll(char *fileName) 
/* Load all chromAlias from a whitespace-separated file.
 * Dispose of this with chromAliasFreeList(). */
{
struct chromAlias *list = NULL, *el;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[3];

while (lineFileRow(lf, row))
    {
    el = chromAliasLoad(row);
    slAddHead(&list, el);
    }
lineFileClose(&lf);
slReverse(&list);
return list;
}

struct chromAlias *chromAliasLoadAllByChar(char *fileName, char chopper) 
/* Load all chromAlias from a chopper separated file.
 * Dispose of this with chromAliasFreeList(). */
{
struct chromAlias *list = NULL, *el;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[3];

while (lineFileNextCharRow(lf, chopper, row, ArraySize(row)))
    {
    el = chromAliasLoad(row);
    slAddHead(&list, el);
    }
lineFileClose(&lf);
slReverse(&list);
return list;
}

struct chromAlias *chromAliasCommaIn(char **pS, struct chromAlias *ret)
/* Create a chromAlias out of a comma separated string. 
 * This will fill in ret if non-null, otherwise will
 * return a new chromAlias */
{
char *s = *pS;

if (ret == NULL)
    AllocVar(ret);
ret->alias = sqlStringComma(&s);
ret->chrom = sqlStringComma(&s);
ret->source = sqlStringComma(&s);
*pS = s;
return ret;
}

void chromAliasFree(struct chromAlias **pEl)
/* Free a single dynamically allocated chromAlias such as created
 * with chromAliasLoad(). */
{
struct chromAlias *el;

if ((el = *pEl) == NULL) return;
freeMem(el->alias);
freeMem(el->chrom);
freeMem(el->source);
freez(pEl);
}

void chromAliasFreeList(struct chromAlias **pList)
/* Free a list of dynamically allocated chromAlias's */
{
struct chromAlias *el, *next;

for (el = *pList; el != NULL; el = next)
    {
    next = el->next;
    chromAliasFree(&el);
    }
*pList = NULL;
}

void chromAliasOutput(struct chromAlias *el, FILE *f, char sep, char lastSep) 
/* Print out chromAlias.  Separate fields with sep. Follow last field with lastSep. */
{
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->alias);
if (sep == ',') fputc('"',f);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->chrom);
if (sep == ',') fputc('"',f);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->source);
if (sep == ',') fputc('"',f);
fputc(lastSep,f);
}

void chromAliasJsonOutput(struct chromAlias *el, FILE *f) 
/* Print out chromAlias in JSON format. */
{
fputc('{',f);
fputc('"',f);
fprintf(f,"alias");
fputc('"',f);
fputc(':',f);
fputc('"',f);
fprintf(f, "%s", el->alias);
fputc('"',f);
fputc(',',f);
fputc('"',f);
fprintf(f,"chrom");
fputc('"',f);
fputc(':',f);
fputc('"',f);
fprintf(f, "%s", el->chrom);
fputc('"',f);
fputc(',',f);
fputc('"',f);
fprintf(f,"source");
fputc('"',f);
fputc(':',f);
fputc('"',f);
fprintf(f, "%s", el->source);
fputc('"',f);
fputc('}',f);
}

/* -------------------------------- End autoSql Generated Code -------------------------------- */

/* our "global" data */
static struct
{
boolean inited;
boolean bptInited;
struct bptIndex *bptList;
struct bbiFile *bbi;
struct lm *lm;
int fieldCount; /* Number of fields. */
char **fields;  /* Names of fields. */
struct hash *chromToAliasHash;
struct hash *aliasToChromHash;
} chromAliasGlobals;

static void readOldAlias(struct lineFile *lf)
/* Don't assume the table is fully populated, and dummy up a value for source. */
{
char *words[1024];	/* process lines, no more than 1,024 words on a line */
char *line;
int size;
while (lineFileNext(lf, &line, &size))
    {
    int wordCount = chopByWhite(line, words, ArraySize(words));
    if (wordCount > 1)
        {
        int i = 1;
        char *native = cloneString(words[0]);
        for ( ; i < wordCount; ++i )
            {
            if (isNotEmpty(words[i]))
                {
                struct chromAlias *chromAlias;
                AllocVar(chromAlias);
                chromAlias->chrom = native;
                chromAlias->alias = cloneString(words[i]);
                chromAlias->source = "none";

                hashAdd(chromAliasGlobals.chromToAliasHash, chromAlias->chrom, chromAlias);
                hashAdd(chromAliasGlobals.aliasToChromHash, chromAlias->alias, chromAlias);
                }
            }
        }
    }
}

static void readFieldedTable(struct lineFile *lf)
/* Use the fieldedTable library to read in fully populated chromAlias.txt file. */
{
struct fieldedTable *aliasTable = fieldedTableAttach(lf, NULL, 0); 
chromAliasGlobals.fieldCount = aliasTable->fieldCount;
chromAliasGlobals.fields = aliasTable->fields;

struct fieldedRow *row;
for(row = aliasTable->rowList; row; row = row->next)
    {
    char *native = row->row[0];

    unsigned field;
    for(field=0; field < aliasTable->fieldCount; field++)
        {
        char *alias = row->row[field];
        char *source = aliasTable->fields[field];

        struct chromAlias *chromAlias;
        AllocVar(chromAlias);
        chromAlias->chrom = native;
        chromAlias->alias = alias;
        chromAlias->source = source;
        hashAdd(chromAliasGlobals.chromToAliasHash, native, chromAlias);
        hashAdd(chromAliasGlobals.aliasToChromHash, alias, chromAlias);
        }
    }
}

static char * gbdbBbExists(char *database)
/* use a gbdb bigBed as our alias file. */
{
// not supported at the moment
/*
char buffer[4096];
safef(buffer, sizeof buffer, "/gbdb/%s/chromAlias.bb", database);
if (fileExists(buffer))
    return cloneString(buffer);
    */
return NULL;
}

void chromAliasSetupBb(char *database, char *bbFile)
/* Look for a chromAlias bigBed file and open it. */
{
chromAliasGlobals.bbi = bigBedFileOpen(bbFile);
struct slName *fieldNames = bbFieldNames(chromAliasGlobals.bbi);
chromAliasGlobals.fieldCount = slCount(fieldNames) - chromAliasGlobals.bbi->definedFieldCount;
AllocArray(chromAliasGlobals.fields, chromAliasGlobals.fieldCount);
int ii;
for(ii=0; ii < chromAliasGlobals.bbi->definedFieldCount; ii++, fieldNames = fieldNames->next)
    ;
for(ii=0; ii < chromAliasGlobals.fieldCount; ii++, fieldNames = fieldNames->next)
    chromAliasGlobals.fields[ii] = fieldNames->name;
chromAliasGlobals.bptList = bbiAliasOpenExtra(chromAliasGlobals.bbi);
chromAliasGlobals.lm = lmInit(0);
}

static void chromAliasSetupHub(char *database)
/* Look for a chromAlias text table and load the hashes with its contents. */
{
char *aliasBbFile = trackHubAliasBbFile(database);
if (aliasBbFile != NULL)
    {
    chromAliasSetupBb(database, aliasBbFile);
    return;
    }
char *aliasFile = trackHubAliasFile(database);
if (aliasFile == NULL)
    return;

struct lineFile *lf = udcWrapShortLineFile(aliasFile, NULL, MAX_HUB_TRACKDB_FILE_SIZE);

chromAliasGlobals.chromToAliasHash = hashNew(0);
chromAliasGlobals.aliasToChromHash = hashNew(0);

char *line;
if (!lineFileNext(lf, &line, NULL))
   errAbort("%s is empty", lf->fileName);
lineFileReuse(lf);

struct errCatch *errCatch = errCatchNew();
if (errCatchStart(errCatch))
    readFieldedTable(lf);
errCatchEnd(errCatch);
if (errCatch->gotError)
    {
    lineFileClose(&lf);
    lf = udcWrapShortLineFile(aliasFile, NULL, MAX_HUB_TRACKDB_FILE_SIZE);
    readOldAlias(lf);
    }
errCatchFree(&errCatch);
lineFileClose(&lf);
}

static void chromAliasSetupSql(char *database)
/* Look for a chromAlias SQL table and load the hashes with its contents. */
{
if (!hTableExists(database, "chromAlias"))
    return;

struct sqlConnection *conn = hAllocConn(database);
chromAliasGlobals.chromToAliasHash = hashNew(0);
chromAliasGlobals.aliasToChromHash = hashNew(0);

/* the 'source' field of this table can be a comma separated list of
 *   naming authorities, not just one.  Keep track so they can be counted.
 */
struct hash *sources = hashNew(0);
int sourceCount = 0;
struct slName *fieldNames = NULL; /* a list of strings, source authority name */
struct slName *name;	/* one name to add to list */

char query[2048];
sqlSafef(query, sizeof(query), "select * from chromAlias");
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct chromAlias *new = chromAliasLoad(row);
    char *words[1024];  /* 1024 naming authorities ?  surely never more . . . */
    int wordCount = chopByChar(new->source, ',', words, ArraySize(words));
    for (int i = 0; i < wordCount; ++i)
	{
        int sourceN = hashIntValDefault(sources, words[i], -1);
        if (sourceN < 0)	/* a new source */
	    {
            name = slNameNew(words[i]);
            slAddHead(&fieldNames, name);
	    hashAddInt(sources, words[i], sourceCount++);
	    }
        struct chromAlias *chromAlias;
        AllocVar(chromAlias);
        chromAlias->chrom = cloneString(new->chrom);
        chromAlias->alias = cloneString(new->alias);
        chromAlias->source = cloneString(words[i]);
	hashAdd(chromAliasGlobals.chromToAliasHash, new->chrom, chromAlias);
	hashAdd(chromAliasGlobals.aliasToChromHash, new->alias, chromAlias);
	}
    chromAliasFree(&new);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
chromAliasGlobals.fieldCount = sourceCount;
slReverse(&fieldNames);
AllocArray(chromAliasGlobals.fields, chromAliasGlobals.fieldCount);
name = fieldNames;
for(int i=0; i < chromAliasGlobals.fieldCount; i++, name = name->next)
    chromAliasGlobals.fields[i] = name->name;
}	/*	static void chromAliasSetupSql(char *database)	*/

static pthread_mutex_t ourMutex = PTHREAD_MUTEX_INITIALIZER;

static void getLock()
/* Create a mutex to make the code thread safe. */
{
pthread_mutex_lock( &ourMutex );
}

static void releaseLock()
/* Release our mutex. */
{
pthread_mutex_unlock( &ourMutex );
}

void chromAliasSetup(char *database)
/* Read in the chromAlias file/table for this database. */
{
if (database == NULL)
    return;

getLock();
if (chromAliasGlobals.inited) {
    releaseLock();
    return;
}
chromAliasGlobals.inited = TRUE;

char *gbdbFile;
if (trackHubDatabase(database))
    chromAliasSetupHub(database);
else if ((gbdbFile = gbdbBbExists(database)) != NULL)
    chromAliasSetupBb(database, gbdbFile);
else
    chromAliasSetupSql(database);
releaseLock();
}

char *findNativeHashes(char *alias)
/* Find a native sequence given an alias using the hash tables. */
{
struct chromAlias *chromAlias = (struct chromAlias *)hashFindVal(chromAliasGlobals.aliasToChromHash, alias);
if (chromAlias != NULL)
    return cloneString(chromAlias->chrom);
return NULL;
}

char *chromAliasFindNative(char *alias)
/* Find the native seqName for a given alias. */
{
static struct hash *cachedNative;
char *chrom;

if (cachedNative == NULL)
    cachedNative = newHash(6);

if ((chrom = hashFindVal(cachedNative, alias)) != NULL)
    return chrom;

getLock();
if ((chrom = hashFindVal(cachedNative, alias)) == NULL)
    {
    if (chromAliasGlobals.bbi)
        chrom = bbiAliasFindNative(chromAliasGlobals.bbi, chromAliasGlobals.bptList, chromAliasGlobals.lm,  alias);
    else if (chromAliasGlobals.aliasToChromHash)
        chrom = findNativeHashes(alias);

    hashAdd(cachedNative, alias, cloneString(chrom));
    }
releaseLock();

return cloneString(chrom);
}

struct slName *findAliasesHashes(char *seqName)
/* Find the aliases for a given seqName using the hashes. */
{
struct slName *slList = NULL;
struct hashEl *thisEl = hashLookup(chromAliasGlobals.chromToAliasHash, seqName);

for (;thisEl != NULL; thisEl = hashLookupNext(thisEl))
    {
    struct chromAlias *chromAlias = (struct chromAlias *)thisEl->val;
    struct slName *name = newSlName(chromAlias->alias);
    slAddHead(&slList, name);
    }

return slList;
}

struct slName *chromAliasFindAliases(char *seqName)
/* Find the aliases for a given seqName. */
{
static struct hash *cachedAliases;
struct slName *aliases;

if (cachedAliases == NULL)
    cachedAliases = newHash(6);

if ((aliases = hashFindVal(cachedAliases, seqName)) != NULL)
    return aliases;

getLock();
if ((aliases = hashFindVal(cachedAliases, seqName)) == NULL)
    {
    if (chromAliasGlobals.bbi)
        aliases = bbiAliasFindAliases(chromAliasGlobals.bbi,chromAliasGlobals.lm, seqName);
    else if (chromAliasGlobals.chromToAliasHash)
        aliases = findAliasesHashes(seqName);

    hashAdd(cachedAliases, seqName, aliases);
    }
releaseLock();

return aliases;
}

char *chromAliasFindSingleAlias(char *seqName, char *authority)
/* Find the aliases for a given seqName from a given authority. */
{
if (authority == NULL)
    return cloneString(seqName);

struct slName *aliases = chromAliasFindAliases(seqName);


if (aliases == NULL)
    return cloneString(seqName);

unsigned fieldNum = 0;
for(; fieldNum < chromAliasGlobals.fieldCount; fieldNum++)
    {
    if (sameString(authority, chromAliasGlobals.fields[fieldNum]))
        break;
    }

if (fieldNum >= chromAliasGlobals.fieldCount)
    return cloneString(seqName);

unsigned count = 0;
for(; aliases && count < fieldNum; count++,aliases = aliases->next)
    ;

if (!aliases)
    return cloneString(seqName);

if (!isEmpty(aliases->name))
    return cloneString(aliases->name);

return cloneString(seqName);
}

char *chromAliasGetDisplayChrom(char *db, struct cart *cart, char *seqName)
/* Return the sequence name to display based on the database and cart. */
{
if (trackHubDatabase(db))
    {
    struct trackHubGenome *genome = trackHubGetGenome(db);

    return chromAliasFindSingleAlias(seqName, genome->chromAuthority);
    }

return seqName;
}

char *chromAliasNCBI(char *db, char *chr, char *gcX)
/* given the database and the chrom name, find the NCBI equivalent chr name */
{
char *seqName = NULL;
/* just in case this has not yet been done by the caller */
chromAliasSetup(db);
if (startsWith("GCF", gcX))
    seqName = chromAliasFindSingleAlias(chr, "refseq");
else
    seqName = chromAliasFindSingleAlias(chr, "genbank");
return seqName;
}
