/* gtexGeneBed.c was originally generated by the autoSql program, which also 
 * generated gtexGeneBed.h and gtexGeneBed.sql.  This module links the database and
 * the RAM representation of objects. */

#include "common.h"
#include "linefile.h"
#include "dystring.h"
#include "jksql.h"
#include "gtexGeneBed.h"



char *gtexGeneBedCommaSepFieldNames = "chrom,chromStart,chromEnd,name,score,strand,geneId,geneType,expCount,expScores";

struct gtexGeneBed *gtexGeneBedLoadByQuery(struct sqlConnection *conn, char *query)
/* Load all gtexGeneBed from table that satisfy the query given.  
 * Where query is of the form 'select * from example where something=something'
 * or 'select example.* from example, anotherTable where example.something = 
 * anotherTable.something'.
 * Dispose of this with gtexGeneBedFreeList(). */
{
struct gtexGeneBed *list = NULL, *el;
struct sqlResult *sr;
char **row;

sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    el = gtexGeneBedLoad(row);
    slAddHead(&list, el);
    }
slReverse(&list);
sqlFreeResult(&sr);
return list;
}

void gtexGeneBedSaveToDb(struct sqlConnection *conn, struct gtexGeneBed *el, char *tableName, int updateSize)
/* Save gtexGeneBed as a row to the table specified by tableName. 
 * As blob fields may be arbitrary size updateSize specifies the approx size
 * of a string that would contain the entire query. Arrays of native types are
 * converted to comma separated strings and loaded as such, User defined types are
 * inserted as NULL. This function automatically escapes quoted strings for mysql. */
{
struct dyString *update = dyStringNew(updateSize);
char  *expScoresArray;
expScoresArray = sqlFloatArrayToString(el->expScores, el->expCount);
sqlDyStringPrintf(update, "insert into %s values ( '%s',%u,%u,'%s',%u,'%s','%s','%s',%u,'%s')", 
	tableName,  el->chrom,  el->chromStart,  el->chromEnd,  el->name,  el->score,  el->strand,  el->geneId,  el->geneType,  el->expCount,  expScoresArray );
sqlUpdate(conn, update->string);
dyStringFree(&update);
freez(&expScoresArray);
}

struct gtexGeneBed *gtexGeneBedLoad(char **row)
/* Load a gtexGeneBed from row fetched with select * from gtexGeneBed
 * from database.  Dispose of this with gtexGeneBedFree(). */
{
struct gtexGeneBed *ret;

AllocVar(ret);
ret->expCount = sqlUnsigned(row[8]);
ret->chrom = cloneString(row[0]);
ret->chromStart = sqlUnsigned(row[1]);
ret->chromEnd = sqlUnsigned(row[2]);
ret->name = cloneString(row[3]);
ret->score = sqlUnsigned(row[4]);
safecpy(ret->strand, sizeof(ret->strand), row[5]);
ret->geneId = cloneString(row[6]);
ret->geneType = cloneString(row[7]);
{
int sizeOne;
sqlFloatDynamicArray(row[9], &ret->expScores, &sizeOne);
assert(sizeOne == ret->expCount);
}
return ret;
}

struct gtexGeneBed *gtexGeneBedLoadAll(char *fileName) 
/* Load all gtexGeneBed from a whitespace-separated file.
 * Dispose of this with gtexGeneBedFreeList(). */
{
struct gtexGeneBed *list = NULL, *el;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[10];

while (lineFileRow(lf, row))
    {
    el = gtexGeneBedLoad(row);
    slAddHead(&list, el);
    }
lineFileClose(&lf);
slReverse(&list);
return list;
}

struct gtexGeneBed *gtexGeneBedLoadAllByChar(char *fileName, char chopper) 
/* Load all gtexGeneBed from a chopper separated file.
 * Dispose of this with gtexGeneBedFreeList(). */
{
struct gtexGeneBed *list = NULL, *el;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[10];

while (lineFileNextCharRow(lf, chopper, row, ArraySize(row)))
    {
    el = gtexGeneBedLoad(row);
    slAddHead(&list, el);
    }
lineFileClose(&lf);
slReverse(&list);
return list;
}

struct gtexGeneBed *gtexGeneBedCommaIn(char **pS, struct gtexGeneBed *ret)
/* Create a gtexGeneBed out of a comma separated string. 
 * This will fill in ret if non-null, otherwise will
 * return a new gtexGeneBed */
{
char *s = *pS;

if (ret == NULL)
    AllocVar(ret);
ret->chrom = sqlStringComma(&s);
ret->chromStart = sqlUnsignedComma(&s);
ret->chromEnd = sqlUnsignedComma(&s);
ret->name = sqlStringComma(&s);
ret->score = sqlUnsignedComma(&s);
sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand));
ret->geneId = sqlStringComma(&s);
ret->geneType = sqlStringComma(&s);
ret->expCount = sqlUnsignedComma(&s);
{
int i;
s = sqlEatChar(s, '{');
AllocArray(ret->expScores, ret->expCount);
for (i=0; i<ret->expCount; ++i)
    {
    ret->expScores[i] = sqlFloatComma(&s);
    }
s = sqlEatChar(s, '}');
s = sqlEatChar(s, ',');
}
*pS = s;
return ret;
}

void gtexGeneBedFree(struct gtexGeneBed **pEl)
/* Free a single dynamically allocated gtexGeneBed such as created
 * with gtexGeneBedLoad(). */
{
struct gtexGeneBed *el;

if ((el = *pEl) == NULL) return;
freeMem(el->chrom);
freeMem(el->name);
freeMem(el->geneId);
freeMem(el->geneType);
freeMem(el->expScores);
freez(pEl);
}

void gtexGeneBedFreeList(struct gtexGeneBed **pList)
/* Free a list of dynamically allocated gtexGeneBed's */
{
struct gtexGeneBed *el, *next;

for (el = *pList; el != NULL; el = next)
    {
    next = el->next;
    gtexGeneBedFree(&el);
    }
*pList = NULL;
}

void gtexGeneBedOutput(struct gtexGeneBed *el, FILE *f, char sep, char lastSep) 
/* Print out gtexGeneBed.  Separate fields with sep. Follow last field with lastSep. */
{
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->chrom);
if (sep == ',') fputc('"',f);
fputc(sep,f);
fprintf(f, "%u", el->chromStart);
fputc(sep,f);
fprintf(f, "%u", el->chromEnd);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->name);
if (sep == ',') fputc('"',f);
fputc(sep,f);
fprintf(f, "%u", el->score);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->strand);
if (sep == ',') fputc('"',f);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->geneId);
if (sep == ',') fputc('"',f);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->geneType);
if (sep == ',') fputc('"',f);
fputc(sep,f);
fprintf(f, "%u", el->expCount);
fputc(sep,f);
{
int i;
if (sep == ',') fputc('{',f);
for (i=0; i<el->expCount; ++i)
    {
    fprintf(f, "%g", el->expScores[i]);
    fputc(',', f);
    }
if (sep == ',') fputc('}',f);
}
fputc(lastSep,f);
}

/* -------------------------------- End autoSql Generated Code -------------------------------- */

void gtexGeneBedCreateTable(struct sqlConnection *conn, char *table)
/* Create expression record format table of given name. */
{
char query[1024];

sqlSafef(query, sizeof(query),
"CREATE TABLE %s (\n"
"   chrom varchar(255) not null,	# Reference sequence chromosome or scaffold\n"
"   chromStart int unsigned not null,	# Start position in chromosome\n"
"   chromEnd int unsigned not null,	# End position in chromosome\n"
"   name varchar(255) not null,	# Gene symbol\n"
"   score int unsigned not null,	# Score from 0-1000\n"
"   strand char(1) not null,	# + or - for strand\n"
"   geneId varchar(255) not null,	# Ensembl gene ID, referenced in GTEx data tables\n"
"   geneType varchar(255) not null,	# GENCODE gene biotype\n"
"   expCount int unsigned not null,	# Number of experiment values\n"
"   expScores longblob not null,	# Comma separated list of experiment scores\n"
          "#Indices\n"
"   PRIMARY KEY(chrom,geneId)\n"
"   INDEX(geneId)\n"
"   INDEX(chrom,chromStart)\n"
")\n",
    table);
sqlRemakeTable(conn, table, query);
}

char *gtexGeneClass(struct gtexGeneBed *geneBed)
/* Return gene "class" (analogous to GENCODE transcriptClass) for a GENCODE gene biotype 
 * Mapped as follows:

 * coding: IG_C_gene, IG_D_gene, IG_J_gene, IG_V_gene, 
               TR_C_gene, TR_D_gene, TR_J_gene, TR_V_gene 
               polymorphic_pseudogene, protein_coding

 * pseudo: IG_C_pseudogene, IG_J_pseudogene, IG_V_pseudogene, TR_J_pseudogene, TR_V_pseudogene,
               pseudogene 

 * nonCoding: 3prime_overlapping_ncrna, Mt_rRNA, Mt_tRNA, antisense, lincRNA, miRNA, 
                misc_RNA, processed_transcript, rRNA, sense_intronic, sense_overlapping, 
                snRNA, snoRNA
 * (MarkD request out for approval).
*/
{
char *geneType = geneBed->geneType;
if (geneType == NULL)
    return "unknown";
if (sameString(geneType, "coding") || sameString(geneType, "protein_coding") ||
        sameString(geneType, "polymorphic_pseudogene") || endsWith(geneType, "_gene"))
    return "coding";
if (sameString(geneType, "pseudo") || sameString(geneType, "pseudogene") ||
        endsWith(geneType, "_pseudogene"))
    return "pseudo";
// A bit of a cheat here -- better a mapping table
return "nonCoding";
}

boolean gtexGeneIsCoding(struct gtexGeneBed *geneBed)
/* Return TRUE if biotype indicates this is a protein coding gene */
{
return sameString("coding", gtexGeneClass(geneBed));
}

float gtexGeneTotalMedianExpression(struct gtexGeneBed *geneBed)
/* Return total of all tissue medians */
{
int i;
float sum = 0.0;
for (i=0; i<geneBed->expCount; i++)
    sum += geneBed->expScores[i];
return sum;
}

float gtexGeneHighestMedianExpression(struct gtexGeneBed *geneBed, int *tissueIdRet)
/* Return tissue median and id of tissue with highest expression of this gene */
{
int i;
float maxScore = 0.0;
assert(tissueIdRet);
for (i=0; i<geneBed->expCount; i++)
    {
    float score = geneBed->expScores[i];
    if (score > maxScore)
        {
        maxScore = score;
        *tissueIdRet = i;
        }
    }
return maxScore;
}
