/* ccdsClick - click handling for CCDS track and related functions  */

/* Copyright (C) 2013 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
#include "common.h"
#include "hgc.h"
#include "ccdsClick.h"
#include "ccdsInfo.h"
#include "ccdsNotes.h"
#include "ccdsGeneMap.h"
#include "geneSimilarities.h"
#include "genbank.h"
#include "genePred.h"
#include "genePredReader.h"
#include "ensFace.h"
#include "mgcClick.h"
#include "htmshell.h"

static struct ccdsInfo *getCcdsInfoForSrcDb(struct sqlConnection *conn, char *acc)
/* Get a ccdsInfo object for a RefSeq, ensembl, or vega gene, if it
 * exists, otherwise return NULL */
{
if (sqlTableExists(conn, "ccdsInfo"))
    return ccdsInfoSelectByMrna(conn, acc);
else
    return NULL;
}

void printCcdsExtUrl(char *ccdsId)
/* Print out URL to link to CCDS database at NCBI */
{
printf("https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&BUILDS=ALLBUILDS&DATA=%s", ccdsId);
}

static void printCcdsUrlForSrcDb(struct sqlConnection *conn, struct ccdsInfo *ccdsInfo)
/* Print out CCDS hgc URL for a refseq, ensembl, or vega gene, if it
 * exists.  */
{
printf("../cgi-bin/hgc?%s&g=ccdsGene&i=%s&c=%s&o=%d&l=%d&r=%d&db=%s",
       cartSidUrlString(cart), ccdsInfo->ccds, seqName, 
       winStart, winStart, winEnd, database);
}

void printCcdsForSrcDb(struct sqlConnection *conn, char *acc)
/* Print out CCDS hgc link for a refseq, ensembl, or vega gene, if it
 * exists.  */
{
struct ccdsInfo *ccdsInfo = getCcdsInfoForSrcDb(conn, acc);;
if (ccdsInfo != NULL)
    {
    printf("<B>CCDS:</B> <A href=\"");
    printCcdsUrlForSrcDb(conn, ccdsInfo);
    printf("\">%s</A><BR>", ccdsInfo->ccds);
    }
}

struct ccdsGeneMap *getCcdsGenesForMappedGene(struct sqlConnection *conn, char *acc,
                                              char *mapTable)
/* get a list of ccds genes associated with a current and window from a
 * mapping table, or NULL */
{
struct ccdsGeneMap *ccdsGenes = NULL;
if (sqlTableExists(conn, mapTable) && sqlTableExists(conn, "ccdsInfo"))
    ccdsGenes = ccdsGeneMapSelectByGeneOver(conn, mapTable, acc, seqName,
                                       winStart, winEnd, 0.0);
slSort(&ccdsGenes, ccdsGeneMapCcdsIdCmp);
return ccdsGenes;
}

void printCcdsUrl(struct sqlConnection *conn, char *ccdsId)
/* Print out CCDS hgc URL for a gene  */
{
printf("../cgi-bin/hgc?%s&g=ccdsGene&i=%s&c=%s&o=%d&l=%d&r=%d&db=%s",
       cartSidUrlString(cart), ccdsId, seqName, 
       winStart, winStart, winEnd, database);
}

void printCcdsForMappedGene(struct sqlConnection *conn, char *acc,
                            char *mapTable)
/* Print out CCDS links for a gene mapped via a cddsGeneMap table  */
{
struct ccdsGeneMap *ccdsGenes = getCcdsGenesForMappedGene(conn, acc, mapTable);
if (ccdsGenes != NULL)
    {
    printf("<B>CCDS:</B> ");
    struct ccdsGeneMap *gene;
    for (gene = ccdsGenes; gene != NULL; gene = gene->next)
        {
        if (gene != ccdsGenes)
            printf(", ");
        printf("<A href=\"");
        printCcdsUrl(conn, gene->ccdsId);
        printf("\">%s</A>", gene->ccdsId);
        }
    printf("<BR>\n");
    }
}

static char *getCcdsGeneSymbol(struct sqlConnection *conn, struct ccdsInfo *rsCcds)
/* get the gene name for a CCDS */
{
struct ccdsInfo *ci;
char accBuf[GENBANK_ACC_BUFSZ], query[256];
char *geneSym = NULL;

for (ci = rsCcds; ci != NULL; ci = ci->next)
    {
    sqlSafef(query, sizeof(query), "select name from %s where mrnaAcc='%s'",
          refLinkTable, genbankDropVer(accBuf, ci->mrnaAcc));
    geneSym = sqlQuickString(conn, query);
    if (geneSym != NULL)
        return geneSym;
    }
return NULL;
}

static char *getCcdsRefSeqSummary(struct sqlConnection *conn, struct ccdsInfo *rsCcds)
/* get the refseq summary for a CCDS */
{
struct ccdsInfo *ci;
char accBuf[GENBANK_ACC_BUFSZ];
char *summary = NULL;

for (ci = rsCcds; ci != NULL; ci = ci->next)
    {
    summary = getRefSeqSummary(conn, genbankDropVer(accBuf, ci->mrnaAcc));
    if (summary != NULL)
        return summary;
    }
return NULL;
}

static struct ccdsGeneMap *ccdsGetGenes(struct sqlConnection *conn, char *mapTable,
                                        char *ccdsId)
/* Get ccdsGeneMap objects for a ccdsId.  Returns only
 * the best overlapping ones (ones with the same cdsSimilariy as
 * the highest cdsSimilariy. */
{
struct ccdsGeneMap *ccdsGenes = NULL, *bestCcdsGenes = NULL, *ccdsGene;

/* filter by chrom due to PAR */
ccdsGenes = ccdsGeneMapSelectByCcds(conn, mapTable, ccdsId, seqName, 0.0);
if (ccdsGenes == NULL)
    return NULL;

bestCcdsGenes = slPopHead(&ccdsGenes);  /* seed with first */
while ((ccdsGene = slPopHead(&ccdsGenes)) != NULL)
    {
    if (ccdsGene->cdsSimilarity == bestCcdsGenes->cdsSimilarity)
        {
        /* same as best, keep */
        slAddHead(&bestCcdsGenes, ccdsGene);
        }
    else if (ccdsGene->cdsSimilarity > bestCcdsGenes->cdsSimilarity)
        {
        /* new best, replace list */
        ccdsGeneMapFreeList(&bestCcdsGenes);
        bestCcdsGenes = ccdsGene;
        }
    else
        {
        /* worse, drop */
        ccdsGeneMapFree(&ccdsGene);
        }
    }

/* only keep one of each gene */
slUniqify(&bestCcdsGenes, ccdsGeneMapGeneIdCmp, ccdsGeneMapFree);

return bestCcdsGenes;
}

static void printCcdsHgGeneUrl(struct sqlConnection *conn, char *ccdsId, char* kgId)
/* output a URL to hgGene for a ccds */
{
char where[128];
struct genePredReader *gpr;
struct genePred *ccdsGene = NULL, *kgGene = NULL;

/* get ccds genePred to get location */
sqlSafef(where, sizeof(where), "chrom = '%s' and name = '%s'", seqName, ccdsId);
gpr = genePredReaderQuery(conn, "ccdsGene", where);
ccdsGene = genePredReaderAll(gpr);
genePredReaderFree(&gpr);
if (ccdsGene == NULL)
    errAbort("%s not found in ccdsGene table for chrom %s", ccdsId, seqName);
else if (ccdsGene->next != NULL)
    errAbort("multiple %s rows found in ccdsGene table for chrom %s", ccdsId, seqName);

/* get KG genePred, as need exact location for link */
sqlSafef(where, sizeof(where), "name = '%s' and strand = '%s'", kgId,
      ccdsGene->strand);
gpr = genePredReaderRangeQuery(conn, "knownGene", seqName,
                               ccdsGene->txStart, ccdsGene->txEnd, where);
kgGene = genePredReaderAll(gpr);
genePredReaderFree(&gpr);
if (kgGene == NULL)
    errAbort("%s not found in knownGene table for chrom %s", kgId, seqName);
else if (kgGene->next != NULL)
    errAbort("multiple %s rows found in knownGene table for chrom %s", kgId, seqName);

printf("../cgi-bin/hgGene?%s&%s=%s&%s=%s&%s=%s&%s=%d&%s=%d",
       cartSidUrlString(cart),
       "db", database,
       "hgg_gene", kgId,
       "hgg_chrom", seqName,
       "hgg_start", kgGene->txStart,
       "hgg_end", kgGene->txEnd);
genePredFree(&ccdsGene);
genePredFree(&kgGene);
}

static void ccdsNcbiRows(char *ccdsId, struct ccdsInfo *rsCcds)
/* output RefSeq CCDS entries */
{
struct ccdsInfo *ci;
for (ci = rsCcds; ci != NULL; ci = ci->next)
    {
    printf("<TR>");
    if (ci == rsCcds)
        printf("<TH ROWSPAN=%d>RefSeq", slCount(rsCcds));
    printf("<TD><A HREF=\"");
    printEntrezNucleotideUrl(stdout, ci->mrnaAcc);
    printf("\" TARGET=_blank>%s</A>&nbsp;", ci->mrnaAcc);
    printf("<TD><A HREF=\"");
    printEntrezProteinUrl(stdout, ci->protAcc);
    printf("\" TARGET=_blank>%s</A>", ci->protAcc);
    printf("</TR>\n");
    }
}

static void ccdsHinxtonRows(char *ccdsId, bool isVega,struct ccdsInfo *hinCcds)
/* output Ensembl or vega CCDS entries */
{
struct ccdsInfo *ci;
char *dbArg = isVega ? "&db=vega" : "";
char *ensGenome = ensOrgNameFromScientificName(scientificName);
if (ensGenome == NULL)
    errAbort("%s: ensOrgNameFromScientificName failed", ccdsId);

for (ci = hinCcds; ci != NULL; ci = ci->next)
    {
    printf("<TR>");
    if (ci == hinCcds)
        printf("<TH ROWSPAN=%d>%s", slCount(hinCcds),
               (isVega ? "Vega" : "Ensembl"));
    printf("<TD><A HREF=\"http://www.ensembl.org/%s/geneview?transcript=%s%s\""
           " TARGET=_blank>%s</A>&nbsp;",
           ensGenome, ci->mrnaAcc, dbArg, ci->mrnaAcc);
    printf("<TD><A HREF=\"http://www.ensembl.org/%s/protview?peptide=%s%s\""
           " TARGET=_blank>%s</A>",
           ensGenome, ci->protAcc, dbArg, ci->protAcc);
    printf("</TR>\n");
    }
}

static void ccdsKnownGenesRows(struct sqlConnection *conn, char *ccdsId)
/* output KnownGenes mapped to CCDS */
{
struct ccdsGeneMap *ccdsKgs = ccdsGetGenes(conn, "ccdsKgMap", ccdsId);
struct ccdsGeneMap *ccdsKg;
for (ccdsKg = ccdsKgs; ccdsKg != NULL; ccdsKg = ccdsKg->next)
    {
    char *spId = kgIdToSpId(conn, ccdsKg->geneId);
    printf("<TR>");
    if (ccdsKg == ccdsKgs)
        printf("<TH ROWSPAN=%d>%s", slCount(ccdsKgs), "UCSC Genes");
    printf("<TD><A HREF=\"");
    printCcdsHgGeneUrl(conn, ccdsId, ccdsKg->geneId);
    printf("\" TARGET=_blank>%s</A>", ccdsKg->geneId);

    printf("<TD>&nbsp;");
    freez(&spId);

    printf("</TR>\n");
    }
}

static void ccdsMgcRows(struct sqlConnection *conn, char *ccdsId)
/* output MGCs mapped to CCDS */
{
// only possible to get multiple CCDS genePreds in PAR, since we are linking
// to details, not browser, only use the first set.
struct geneSimilarities *geneSims
    = geneSimilaritiesBuildAll(conn, TRUE, ccdsId, "ccdsGene", "mgcGenes");

struct geneSim *mgc;
for (mgc = geneSims->genes; mgc != NULL; mgc = mgc->next)
    {
    printf("<TR>");
    if (mgc == geneSims->genes)
        printf("<TH ROWSPAN=%d>MGC", slCount(geneSims->genes));
    printf("<TD><A HREF=\"");
    printMgcDetailsUrl(mgc->gene->name, mgc->gene->txStart);
    printf("\">%s</A>", mgc->gene->name);
    printf("<TD>&nbsp;</TR>\n");
    }
geneSimilaritiesFreeList(&geneSims);
}

static void writeBasicInfoHtml(struct sqlConnection *conn, char *ccdsId, struct ccdsInfo *rsCcds,
                               struct ccdsInfo *vegaCcds, struct ccdsInfo *ensCcds)
/* write table with basic information about the CCDS (2 columns) */
{                               {
printf("<TABLE class=\"hgcCcds\"><TBODY>\n");

/* gene symbol */
char *geneSym = getCcdsGeneSymbol(conn, rsCcds);
if (geneSym != NULL)
    printf("<TR><TH>Gene<TD>%s</TR>\n", geneSym);
freez(&geneSym);

/* description */
char *desc = hGenBankGetDesc(database, rsCcds->mrnaAcc, TRUE);
if (desc != NULL)
    printf("<TR><TH>Description<TD>%s</TR>\n", desc);
freez(&desc);

/* CCDS sequence links */
printf("<TR>\n");
printf("<TH>Sequences");
printf("<TD>");
hgcAnchorSomewhere("htcGeneMrna", ccdsId, "ccdsGene", seqName);
printf("CDS</A>, &nbsp;");
hgcAnchorSomewhere("htcTranslatedPredMRna", ccdsId, "translate", seqName);
printf("protein</A>, &nbsp;");
hgcAnchorSomewhere( "htcGeneInGenome", ccdsId, "ccdsGene", seqName);
printf("genomic</A>");
printf("</TR>\n");

/* CCDS databases */
printf("<TR>\n");
printf("<TH>CCDS database<TD> <A HREF=\"");
printCcdsExtUrl(ccdsId);
printf("\" TARGET=_blank>%s</A>", ccdsId);
printf("</TR>\n");

printf("</TBODY></TABLE>\n");
printf("<BR>\n");
}                               }

static void writeLinksHtml(struct sqlConnection *conn, char *ccdsId, struct ccdsInfo *rsCcds,
                           struct ccdsInfo *vegaCcds, struct ccdsInfo *ensCcds)
/* write table with links to other browser apps or external databases (3 columns) */
{
printf("<H3>Associated Sequences</H3>\n");
printf("<TABLE class=\"hgcCcds\">\n");
printf("<THEAD>\n");
printf("<TR><TH>&nbsp;<TH>mRNA<TH>Protein</TR>\n");
printf("</THEAD><TBODY>\n");
if (sqlTableExists(conn, "ccdsKgMap"))
    ccdsKnownGenesRows(conn, ccdsId);
ccdsNcbiRows(ccdsId, rsCcds);
if (vegaCcds != NULL)
    ccdsHinxtonRows(ccdsId, TRUE, vegaCcds);
if (ensCcds != NULL)
    ccdsHinxtonRows(ccdsId, FALSE, ensCcds);
if (sqlTableExists(conn, "mgcGenes"))
    ccdsMgcRows(conn, ccdsId);

printf("</TBODY></TABLE>\n");

printf("<P><EM>Note: mRNA and protein sequences in other gene collections "
       "may differ from the CCDS sequences.</EM><br>\n");
}

static void writePublicNotesHtml(struct sqlConnection *conn, char *ccdsId)
/* write public notes if available */
{
struct ccdsNotes *ccdsNotes = NULL;
if (sqlTableExists(conn, "ccdsNotes"))
    ccdsNotes = sqlQueryObjs(conn, (sqlLoadFunc)ccdsNotesLoad, sqlQueryMulti,
                             "select * from ccdsNotes where ccds = \"%s\" order by createDate", ccdsId);
if (ccdsNotes != NULL)
    {
    htmlHorizontalLine();
    printf("<H3>Annotator Notes</H3>\n");
    printf("<TABLE class=\"hgcCcds\">\n");
    printf("<THEAD>\n");
    printf("<TR><TH>Date<TH>Note</TR>\n");
    printf("</THEAD><TBODY>\n");
    struct ccdsNotes *note;
    for (note = ccdsNotes; note != NULL; note = note->next)
        printf("<TR><TD style=\"white-space: nowrap;\">%s<TD>%s</TR>", note->createDate, note->note);
    printf("</TBODY></TABLE>\n");
    }
}

static void writeRefSeqSummaryHtml(struct sqlConnection *conn, char *ccdsId, struct ccdsInfo *rsCcds)
/* write refseq summary HTML, if a summary is available */
{
char *summary = getCcdsRefSeqSummary(conn, rsCcds);
if (summary != NULL)
    {
    htmlHorizontalLine();
    printf("<H3>RefSeq summary of %s</H3>\n", ccdsId);
    printf("<P>%s</P>\n", summary);
    freez(&summary);
    }
}

void doCcdsGene(struct trackDb *tdb, char *ccdsId)
/* Process click on a CCDS gene. */
{
struct sqlConnection *conn = hAllocConn(database);
struct ccdsInfo *rsCcds = ccdsInfoSelectByCcds(conn, ccdsId, ccdsInfoNcbi);
struct ccdsInfo *vegaCcds = ccdsInfoSelectByCcds(conn, ccdsId, ccdsInfoVega);
struct ccdsInfo *ensCcds = ccdsInfoSelectByCcds(conn, ccdsId, ccdsInfoEnsembl);

if (rsCcds == NULL)
    errAbort("database inconsistency: no NCBI ccdsInfo entries found for %s", ccdsId);
if ((vegaCcds == NULL) && (ensCcds == NULL))
    errAbort("database inconsistency: no Hinxton ccdsInfo entries found for %s", ccdsId);

ccdsInfoMRnaSort(&rsCcds);
ccdsInfoMRnaSort(&vegaCcds);
ccdsInfoMRnaSort(&ensCcds);

cartWebStart(cart, database, "CCDS Gene");

printf("<H2>Consensus CDS Gene %s</H2>\n", ccdsId);

writeBasicInfoHtml(conn, ccdsId, rsCcds, vegaCcds, ensCcds);
writeLinksHtml(conn, ccdsId, rsCcds, vegaCcds, ensCcds);
writePublicNotesHtml(conn, ccdsId);
writeRefSeqSummaryHtml(conn, ccdsId, rsCcds);
htmlHorizontalLine();

printTrackHtml(tdb);
ccdsInfoFreeList(&rsCcds);
ccdsInfoFreeList(&vegaCcds);
ccdsInfoFreeList(&ensCcds);
hFreeConn(&conn);
}

