/* promoSeqFromCluster - Get promoter regions from cluster. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "portable.h"
#include "dnaseq.h"
#include "fa.h"
#include "jksql.h"
#include "hdb.h"
#include "bed.h"


void usage()
/* Explain usage and exit. */
{
errAbort(
  "promoSeqFromCluster - Get promoter regions from cluster\n"
  "usage:\n"
  "   promoSeqFromCluster database size clusterFile.kgg outDir out.lft\n"
  "clusterFile is the output from Eisen's cluster program in kmeans\n"
  "settings.\n"
  "options:\n"
  "   -xxx=XXX\n"
  );
}

void fetchPromoter(char *database, char *rangeId, int size, FILE *f, FILE *liftFile)
/* Get promoter region and write it to file. */
{
struct sqlResult *sr;
char **row;
struct bed *bed;
char query[256];
char *table = "rnaCluster";
int hasBin = hIsBinned(database, table);
struct sqlConnection *conn = hAllocConn(database);
struct dnaSeq *seq;
int start, end, chromSize;
boolean isRc;

sqlSafef(query, sizeof(query), "select * from %s where name = '%s'", table, rangeId);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    bed = bedLoadN(row + hasBin, 12);
    chromSize = hChromSize(database, bed->chrom);
    isRc = (bed->strand[0] == '-');
    if (isRc)
        {
	start = bed->chromEnd;
	end = start + size;
	if (end > chromSize) 
	    end = chromSize;
	}
    else
        {
	end = bed->chromStart;
	start = end - size;
	if (start < 0) 
	    start = 0;
	}
    seq = hChromSeq(database, bed->chrom, start, end);
    if (bed->strand[0] == '-')
        reverseComplement(seq->dna, seq->size);
    faWriteNext(f, bed->name, seq->dna, seq->size);
    fprintf(liftFile, "%d\t%s\t%d\t%s\t%d\t%s\n",
    	start, bed->name, seq->size, bed->chrom, chromSize, bed->strand);
    freeDnaSeq(&seq);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
}

void promoSeqFromCluster(char *database, int size, char *clusterFile, 
	char *outDir, char *outBed)
/* promoSeqFromCluster - Get promoter regions from cluster. */
{
char fileName[512];
struct lineFile *lf = lineFileOpen(clusterFile, TRUE);
FILE *f = NULL;
FILE *liftFile = mustOpen(outBed, "w");
char *row[3];
char *id, *group, *lastGroup = NULL;

makeDir(outDir);

lineFileRow(lf, row); /* Skip first row which just has labels. */

while (lineFileRow(lf, row))
    {
    id = row[0];
    group = row[2];
    if (lastGroup == NULL || !sameString(group, lastGroup))
        {
	/* If it's a new group make a new file for it. */
	freez(&lastGroup);
	lastGroup = cloneString(group);
	carefulClose(&f);
	snprintf(fileName, sizeof(fileName), "%s/%s.fa", outDir, group);
	f = mustOpen(fileName, "w");
	}
    fetchPromoter(database, id, size, f, liftFile);
    printf(".");
    fflush(stdout);
    }
printf("\n");
lineFileClose(&lf);
carefulClose(&f);
}

int main(int argc, char *argv[])
/* Process command line. */
{
optionHash(&argc, argv);
if (argc != 6)
    usage();
if (!isdigit(argv[2][0]))
    usage();
promoSeqFromCluster(argv[1], atoi(argv[2]), argv[3], argv[4], argv[5]);
return 0;
}
