/* hgSpeciesRna - Create fasta file with RNA from one species. */

/* Copyright (C) 2013 The Regents of the University of California 
 * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "fa.h"
#include "jksql.h"
#include "hdb.h"

void usage()
/* Explain usage and exit. */
{
errAbort(
  "hgSpeciesRna - Create fasta file with RNA from one species\n"
  "usage:\n"
  "   hgSpeciesRna database Genus species output.fa\n"
  "options:\n"
  "   -est         - If set will get ESTs rather than mRNAs\n"
  "   -filter=file - only read accessions listed in file\n"
  );
}

static struct optionSpec options[] = {
   {"est", OPTION_BOOLEAN},
   {"filter", OPTION_STRING},
   {NULL, 0},
};

void hgSpeciesRna(char *database, char *genus, char *species, char *outFile)
/* hgSpeciesRna - Create fasta file with RNA from one species. */
{
char query[256];
struct sqlConnection *conn;
struct sqlResult *sr;
char **row;
int taxon;
char *type = "mRNA";
char *filter = NULL;
FILE *f = mustOpen(outFile, "w");
char *line;
struct lineFile *lf = NULL;
struct hash *filterHash = newHash(0);

filter = optionVal("filter", NULL);
if (filter != NULL)
    {
    lf = lineFileOpen(filter, TRUE);
    while (lineFileNext(lf, &line, NULL))
        {
        char *name = nextWord(&line);
        if (name == NULL)
           errAbort("bad line %d of %s", lf->lineIx, lf->fileName);
        hashStoreName(filterHash, name);
        }
    lineFileClose(&lf);
    }
if (optionExists("est"))
    type = "EST";
conn = hAllocConn(database);
sqlSafef(query, sizeof(query),
	"select id from organism where name = '%s %s'", genus, species);
taxon = sqlQuickNum(conn, query);
if (taxon <= 0)
    errAbort("Can't find taxon for %s %s", genus, species);
sqlSafef(query, sizeof(query), 
    "select acc from gbCdnaInfo where organism=%d and type='%s'",
    taxon, type);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct dnaSeq *seq = NULL;
    if ( filter == NULL || hashLookup(filterHash, row[0]) != NULL)
        {
        seq = hGenBankGetMrna(database, row[0], NULL);
        faWriteNext(f, seq->name, seq->dna, seq->size);
        dnaSeqFree(&seq);
        }
    }
carefulClose(&f);
}

int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (argc != 5)
    usage();
hgSpeciesRna(argv[1], argv[2], argv[3], argv[4]);
return 0;
}
