/* regClusterAttachMetadataToTableOfTables - Try and find metadata (cell line, antibody, etc) for tables - using metaDb first, and if no metaDb object then trying to parse it out of file name. */

/* Copyright (C) 2013 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "jksql.h"
#include "cv.h"


void usage()
/* Explain usage and exit. */
{
errAbort(
  "regClusterAttachMetadataToTableOfTables - Try and find metadata (cell line, antibody, etc)\n"
  "for tables - using metaDb first, and if no metaDb object then trying to parse it out of\n"
  "file name\n"
  "usage:\n"
  "   regClusterAttachMetadataToTableOfTables database partial.table output.table\n"
  "options:\n"
  "   -antibodyTarget - substitute target (from cv.ra) for antibody\n"
  );
}

boolean antibodyTarget = FALSE;

static struct optionSpec options[] = {
   {"antibodyTarget", OPTION_BOOLEAN},
   {NULL, 0}
};

boolean getMetaFromMetaDb(struct sqlConnection *conn,
	char *obj, char **retCell, char **retAntibody, char **retTreatment, char **retLab)
/* Look in metadata for object and return cell, antibody and treatment from it. */
{
char query[256];
struct hash *cvTerm = NULL;

sqlSafef(query, sizeof(query), "select val from metaDb where obj='%s' and var='cell'", obj);
*retCell = sqlQuickString(conn, query);
sqlSafef(query, sizeof(query), "select val from metaDb where obj='%s' and var='antibody'", obj);
*retAntibody = sqlQuickString(conn, query);
if (antibodyTarget)
    {
    cvTerm = (struct hash *)cvOneTermHash(CV_TERM_ANTIBODY, *retAntibody);
    if (cvTerm)
        {
        *retAntibody = hashOptionalVal(cvTerm, CV_TARGET, *retAntibody);
        }
    }
sqlSafef(query, sizeof(query), "select val from metaDb where obj='%s' and var='treatment'", obj);
*retTreatment = sqlQuickString(conn, query);
sqlSafef(query, sizeof(query), "select val from metaDb where obj='%s' and var='lab'", obj);
*retLab = sqlQuickString(conn, query);
return *retCell != NULL || *retAntibody != NULL || *retTreatment != NULL;
}

char *firstUpper(char *s)
/* Return pointer to first upper case letter in string */
{
for (;;)
    {
    char c = *s;
    if (c == 0)
        return NULL;
    else if (isupper(c))
        return s;
    ++s;
    }
}

void getMetaFromObjName(char *obj, char **retCell, char **retAntibody, char **retTreatment,
	char **retLab)
/* Look in metadata for object and return cell, antibody and treatment from it. */
{
char *cell = NULL, *antibody = NULL, *treatment = NULL, *lab = NULL;

/* Skip past first bits. */
char *pattern = "wgEncodeHaibTfbs";
int patLen = strlen(pattern);
if (!startsWith(pattern, obj))
   errAbort("getMetaFromObjName can't handle %s since it doesn't begin with %s", obj, pattern);

/* If we are wgEncodeHaib prefix then we must be HudsonAlpha */
lab = "HudsonAlpha";

/* Rely on CamelCasing to pick out cell/antibody/treatment. */
char *cellStart = obj+patLen;
char *abStart = firstUpper(cellStart+1);
assert(abStart != NULL);
char *treatmentStart = firstUpper(abStart+1);
assert(treatmentStart != NULL);

/* Look up cell */
char *cellId = cloneStringZ(cellStart, abStart - cellStart);
if (sameString(cellId, "Gm12878"))
    cell = "GM12878";
else if (sameString(cellId, "Gm12891"))
    cell = "GM12891";
else if (sameString(cellId, "Gm12892"))
    cell = "GM12892";
else if (sameString(cellId, "H1hesc"))
    cell = "H1-hESC";
else if (sameString(cellId, "Hepg2"))
    cell = "HepG2";
else if (sameString(cellId, "K562"))
    cell = "K562";
else if (sameString(cellId, "A549"))
    cell = "A549";
else
    errAbort("Unrecognized cellId %s in %s", cellId, obj);

/* Look up antibody */
char *abId = cloneStringZ(abStart, treatmentStart - abStart);
if (sameString(abId, "Bcl3"))
    antibody = "BCL3";
else if (sameString(abId, "Ebf"))
    antibody = "EBF";
else if (sameString(abId, "Egr1"))
    antibody = "Egr-1";
else if (sameString(abId, "Irf4"))
    antibody = "IRF4_(M-17)";
else if (sameString(abId, "Oct2"))
    antibody = "Oct-2";	 // This is just a guess, Antibody not even on wiki yet
else if (sameString(abId, "Pou2f2"))
    antibody = "POU2F2"; // Again a guess. Ironically Oct-2 and POU2F2 are same gene
else if (sameString(abId, "Sin3ak20"))
    antibody = "Sin3Ak-20";
else if (sameString(abId, "Taf1"))
    antibody = "TAF1";
else if (sameString(abId, "Yy1"))
    antibody = "YY1_(C-20)";
else if (sameString(abId, "Pol24h8"))
    antibody = "Pol2-4H8";
else if (sameString(abId, "Pol2"))
    antibody = "Pol2";
else if (sameString(abId, "Nrsf"))
    antibody = "NRSF";
else if (sameString(abId, "P300"))
    antibody = "p300";
else if (sameString(abId, "Atf3"))
    antibody = "ATF3";
else if (sameString(abId, "Gabp"))
    antibody = "GABP";
else if (sameString(abId, "Max"))
    antibody = "Max";
else
    errAbort("Unrecognized abId %s in %s", abId, obj);

/* Looks like all of the HAIB missing metadata have no treatment. */
treatment = "None";

/* Clean up, set return variables, and go home. */
freez(&cellId);
freez(&abId);
*retCell = cell;
*retAntibody = antibody;
*retTreatment = treatment;
}

void regClusterAttachMetadataToTableOfTables(char *database, char *partialTable, char *outputTable)
/* regClusterAttachMetadataToTableOfTables - Try and find metadata (cell line, antibody, etc) for 
 * tables - using metaDb first, and if no metaDb object then trying to parse it out of file name. */
{
struct sqlConnection *conn = sqlConnect(database);
struct lineFile *lf = lineFileOpen(partialTable, TRUE);
FILE *f = mustOpen(outputTable, "w");
char *row[7];

while (lineFileRow(lf, row))
    {
    /* Get object ID and attempt to find basic experimental variables from metadata. */
    char *obj = row[6];
    char *cell = NULL, *antibody=NULL, *treatment=NULL, *lab = NULL;
    if (!getMetaFromMetaDb(conn, obj, &cell, &antibody, &treatment, &lab))
        {
        verbose(3, "Can't get metadata for %s from metaDb, parsing filename\n", obj); 
        getMetaFromObjName(obj, &cell, &antibody, &treatment, &lab);
        }

    /* Write out first fields unchanged, and append our new fields. */
    int i;
    for (i=0; i<7; ++i)
    	fprintf(f, "%s\t", row[i]);
    fprintf(f, "%s\t", naForNull(cell));
    fprintf(f, "%s\t", naForNull(antibody));
    if (treatment == NULL)
        treatment = "None";
    fprintf(f, "%s\t", treatment);
    fprintf(f, "%s\n", naForNull(lab));
    }

/* Clean up and go home. */
carefulClose(&f);
lineFileClose(&lf);
sqlDisconnect(&conn);
}

int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (argc != 4)
    usage();
antibodyTarget = optionExists("antibodyTarget");
regClusterAttachMetadataToTableOfTables(argv[1], argv[2], argv[3]);
return 0;
}
