/* kgAliasM - generate alias list table (the mRNA part, there is another protein part) */

/* Copyright (C) 2013 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
#include "common.h"
#include "hCommon.h"
#include "hdb.h"

void usage()
/* Explain usage and exit. */
{
errAbort(
  "kgAliasM - create gene alias (mRNA part) .tab files "
  "usage:\n"
  "   kgAliasM xxxx yyyy\n"
  "            xxxx is genome  database name\n"
  "            yyyy is protein database name \n"
  "example: kgAliasM hg15 proteins0405\n");
}

int main(int argc, char *argv[])
{
struct sqlConnection *conn;

char query2[256];
struct sqlResult *sr2;
char **row2;
    
char *chp0, *chp;
char *kgID;
FILE *o1, *o2;
char cond_str[256];
char *database;
char *proteinDB;
boolean doingAlias, bothDone;

char *answer;
char *symbol, *alias, *aliases;

if (argc != 3) usage();
database  = cloneString(argv[1]);
proteinDB = cloneString(argv[2]);

conn= hAllocConn(database);
o1 = fopen("j.dat", "w");
o2 = fopen("jj.dat", "w");

doingAlias = TRUE;
bothDone   = FALSE;

while (!bothDone)
    {
    if (doingAlias)
	{
    	sqlSafef(query2, sizeof query2, "select symbol, aliases from %s.hgnc;", proteinDB);
	}
    else
	{
        sqlSafef(query2, sizeof query2, "select symbol, prvSymbols from %s.hgnc;", proteinDB);
    	}
    
    sr2 = sqlMustGetResult(conn, query2);
    row2 = sqlNextRow(sr2);
    while (row2 != NULL)
	{
	symbol		= row2[0];
	aliases		= row2[1];

	if ( (symbol  != NULL) && (strlen(symbol) != 0) )
	    {
            sqlSafef(cond_str, sizeof cond_str, "geneSymbol = '%s'", symbol);
            answer = sqlGetField(database, "kgXref", "kgID", cond_str);
	    if (answer != NULL)
		{
		kgID = strdup(answer);
		fprintf(o2, "%s\t%s\n", kgID, symbol);
		}
	    if ( (aliases  != NULL) && (strlen(aliases) != 0) && (answer != NULL) )
		{
		kgID = strdup(answer);
    
		chp0 = aliases; 
	    	while (chp0 != NULL)
		    {
		    while (*chp0 == ' ') chp0++;
		    chp = strstr(chp0, ",");
		    if (chp == NULL)
			{
			alias = strdup(chp0);
			
			/* get rid of quote character in some aliases */
			if (*alias == '"') 
			    {
			    *(alias + strlen(alias) - 1) = '\0';
			    alias++;
			    printf("%s\n", alias);fflush(stdout);
			    }
			chp0 = NULL;
			}
		    else
			{
			*chp = '\0';
			
			/* get rid of quote character in some aliases */
			if (*chp0 == '"') 
			    {
			    *(chp0 + strlen(chp0) - 1) = '\0';
			    chp0++;
			    printf("%s\n", chp0);fflush(stdout);
			    }
			alias = strdup(chp0);
			chp0 = chp+1;
			}
		    if (kgID != NULL)
			{
			fprintf(o1, "%s\t%s\t%s\n", kgID, symbol, alias);
			fprintf(o2, "%s\t%s\n", kgID, alias);
			}
		    }
		}
	    }
	row2 = sqlNextRow(sr2);
	}
    sqlFreeResult(&sr2);

    if (doingAlias) 
	{
	doingAlias = FALSE;
	}
    else
	{
	bothDone = TRUE;
	}
    }
fclose(o1);
fclose(o2);

/* geneAlias.tab has 3 columns, the 2nd is HUGO.symbol 
   and 3rd contains aliases and withdraws */

mustSystem("cat  j.dat|sort|uniq  >geneAlias.tab");

/*  kgAliasM.tab has 2 columns, all entries from HUGO.symbol, HUGO.aliass, 
    and HUGO.withdraws are listed in the 2nd column. */
mustSystem("cat jj.dat|sort|uniq  >kgAliasM.tab");
mustSystem("rm j.dat");
mustSystem("rm jj.dat");
    
return(0);
}
