/* illuminaLookup1M - generate data for Illumina 1M and other SNP arrays track. */
/* use UCSC snpXXX if dbSNP ID is found */
/* use Illumina data if not found */
/* This program is adopted from Heather's illuminaLookup2.c, with substantial simplyfication */

/* Copyright (C) 2013 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */

#include "common.h"
#include "hash.h"
#include "hdb.h"


char *snpDb = NULL;
struct snpSubset 
    {
    char *chrom;
    int start;
    int end;
    char *strand;
    char *observed;
    char *class;
    char *locType;
    };

void usage()
/* Explain usage and exit. */
{
errAbort(
    "illuminaLookup1M - just use rsId/chrom/pos from illumina\n"
    "usage:\n"
    "    illuminaLookup db illuminaPrelimTable snpTable\n");
}

struct hash *storeSnps(char *tableName)
/* store subset data for SNPs in a hash */
/* exclude SNPs that align multiple places */
{
struct hash *ret = NULL;
char query[512];
struct sqlConnection *conn = hAllocConn(snpDb);
struct sqlResult *sr;
char **row;
struct snpSubset *subsetElement = NULL;

verbose(1, "creating SNP hash...\n");
ret = newHash(16);
sqlSafef(query, sizeof(query), 
      "select name, chrom, chromStart, chromEnd, strand, observed, class, locType from %s", 
      tableName);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    AllocVar(subsetElement);
    subsetElement->chrom = cloneString(row[1]);
    subsetElement->start = sqlUnsigned(row[2]);
    subsetElement->end = sqlUnsigned(row[3]);
    subsetElement->strand = cloneString(row[4]);
    subsetElement->observed = cloneString(row[5]);
    subsetElement->class = cloneString(row[6]);
    subsetElement->locType = cloneString(row[7]);
    hashAdd(ret, cloneString(row[0]), subsetElement);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
return ret;
}

void processSnps(struct hash *snpHash, char *illuminaTable)
/* read illuminaTable */
/* lookup details in snpHash */
/* report and skip if SNP missing, class != single, locType != exact */
{
char query[512];
struct sqlConnection *conn = hAllocConn(snpDb);
struct sqlResult *sr;
char **row;
struct hashEl *hel = NULL;
struct snpSubset *subsetElement = NULL;
struct snpSubset *snp = NULL;

char *SNPname, *dbSNPrefStrand, *ILMNrefStrand, *SNPalleles, *GenomeBuild, *Chromosome, *Coordinate;
			    
FILE *output = mustOpen("illuminaLookup.out", "w");
FILE *errors = mustOpen("illuminaLookup.err", "w");

verbose(1, "process SNPs...\n");
sqlSafef(query, sizeof(query), "select * from %s", illuminaTable);

sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    SNPname 		= row[0];
    dbSNPrefStrand 	= row[1];
    ILMNrefStrand	= row[2];
    SNPalleles		= row[3];
    GenomeBuild		= row[4];
    Chromosome		= row[5];
    Coordinate		= row[6];
    
    hel = hashLookup(snpHash, SNPname);
    if (hel == NULL) 
        {
	fprintf(errors, "chr%s\t%d\t%d\t%s\t0\t%s\t%s\n", Chromosome, 
		atoi(Coordinate)-1, atoi(Coordinate), SNPname, ILMNrefStrand, SNPalleles);
	continue;
	}

    /* include all alignments */
    for (hel = hashLookup(snpHash, SNPname); hel != NULL; hel = hashLookupNext(hel))
 	{
    	subsetElement = (struct snpSubset *)hel->val;
    	snp = (struct snpSubset *)hel->val;
    
    	fprintf(output, "%s\t%d\t%d\t%s\t0\t%s\t%s\n", 
    	    	snp->chrom, snp->start, snp->end, SNPname, snp->strand, snp->observed);
   	}
    }

sqlFreeResult(&sr);
hFreeConn(&conn);
carefulClose(&errors);
carefulClose(&output);
}

int main(int argc, char *argv[])
/* load SNPs into hash */
{
char *illuminaTableName = NULL;
char *snpTableName = NULL;

struct hash *snpHash = NULL;

if (argc != 4)
    usage();

snpDb = argv[1];

illuminaTableName = argv[2];
snpTableName = argv[3];

/* process args */
if (!hTableExists(snpDb, illuminaTableName))
    errAbort("no %s table in %s\n", illuminaTableName, snpDb);
if (!hTableExists(snpDb, snpTableName))
    errAbort("no %s table in %s\n", snpTableName, snpDb);
snpHash = storeSnps(snpTableName);
processSnps(snpHash, illuminaTableName);

return 0;
}
