/* Blatz standalone main module.  Handle command-line processing, load sequence 
 * and call alignment routines. */
/* Copyright 2005 Jim Kent.  All rights reserved. */

#include "common.h"
#include "hash.h"
#include "options.h"
#include "errAbort.h"
#include "memalloc.h"
#include "dnautil.h"
#include "dnaseq.h"
#include "chain.h"
#include "fa.h"
#include "dnaLoad.h"
#include "bzp.h"
#include "blatz.h"
#include "dynamic.h" // LX Sep 06 2005

void usage(struct bzp *bzp)
/* Explain usage and exit. */
{
printf("blatz version %d - Align dna across species\n", bzpVersion());
printf("usage:\n");
printf("   blatz target query output\n");
printf("where target and query are either fasta files, nib files, 2bit files\n");
printf("or a text files containing the names of the above files one per line.\n");
printf("It's important that the sequence be repeat masked with repeats in\n");
printf("lower case.\n");
printf("Options: (defaults are shown for numerical parameters)\n");
bzpServerOptionsHelp(bzp);
bzpClientOptionsHelp(bzp);
noWarnAbort();
}

static struct optionSpec options[] = {
   BZP_SERVER_OPTIONS
   BZP_CLIENT_OPTIONS
   {NULL, 0},
};

static void alignAll(struct bzp *bzp, struct blatzIndex *indexList, 
        struct dnaLoad *queryDl, char *outFile)
/* Make up neighorhood index for queryList, and use it to scan
 * targetList.  Put output in outFile */
{
FILE *f = mustOpen(outFile, "w");
struct dnaSeq *query;

// LX BEG
int b, bend, printing; 
FILE *bedfp = NULL;
// See if bed file output of the mask was requested
if (differentString(bzp->dynaBedFileQ, "")) 
   bedfp = mustOpen(bzp->dynaBedFileQ, "w");
// Counts all the query-target hits encountered by the program inside the 
// loops of gapless.c
dynaHits = 0;
// Counts how many target and query positions reached the limit
dynaCountTarget = 0;
dynaCountQuery = 0;
// This is the limit used by the program, currently just bzp->dynaLimit(QT)
// but should be useful for scaling to sequence size
targetHitDLimit = VERY_LARGE_NUMBER; // perhaps unnecessary default
queryHitDLimit = VERY_LARGE_NUMBER; // perhaps unnecessary default
// LX END

while ((query = dnaLoadNext(queryDl)) != NULL)
    {
    double bestScore = 0;
    struct chain *chainList;
    // LX BEG
    if (bzp->dynaLimitQ<VERY_LARGE_NUMBER)
       {
       queryHitDLimit = bzp->dynaLimitQ;
       // allocate zeroed memory for hit counters
       AllocArray(dynaCountQ, query->size);
       }
    // LX END
    if (bzp->unmask || bzp->rna)
        toUpperN(query->dna, query->size);
    if (bzp->rna)
        maskTailPolyA(query->dna, query->size);
    chainList = blatzAlign(bzp, indexList, query);
    if (chainList != NULL) 
    	bestScore = chainList->score;
    else
        {
	if (seqIsLower(query))
	    warn("Sequence %s is all lower case, and thus ignored. Use -unmask "
	         "flag to unmask lower case sequence.", query->name);
	}
    verbose(1, "%s (%d bases) score %2.0f\n", 
            query->name, query->size, bestScore);
    blatzWriteChains(bzp, &chainList, query, 
    	dnaLoadCurStart(queryDl), dnaLoadCurEnd(queryDl),
	dnaLoadCurSize(queryDl), indexList, f);
    // LX BEG
    // This prints the contents of the mask into the .bed file opened above
    if (bedfp != NULL)
       {
       if (bzp->dynaLimitQ<VERY_LARGE_NUMBER)
          {
          printing = 0;
          for (b=0;b<query->size;b++)
              {
              if (dynaCountQ[b] > queryHitDLimit)
                 {
                 if (printing == 0)
                    {
                    printing = 1;
                    fprintf(bedfp,"%s %d ",query->name,b);
                    }
                 }
              if (dynaCountQ[b] <= queryHitDLimit)
                 {
                 if (printing == 1)
                    {
                    printing = 0;
                    bend = b-1;
                    fprintf(bedfp,"%d\n",bend);
                    }
                 }
              }
           }
        else
           {
           fprintf(bedfp,"#No dynamic masking data to print.\n");
           }
        }
    // LX END
    dnaSeqFree(&query);
    }
    // LX BEG
    // Statistics to print about how many hits were dropped (ignored)
    dynaDrops = dynaCountTarget + dynaCountQuery;
    dynaDropsPerc = (float)100*dynaDrops/dynaHits+0.5;
    verbose(2, "%d dynaDrops (%f%%) at T=%d Q=%d \n", 
    	dynaDrops, (double)dynaDropsPerc, targetHitDLimit, queryHitDLimit);
   // Free dynamic memory used for the sequence-length-dependent counter arrays
   freeMem(dynaCountQ);
   if (bedfp != NULL)
      carefulClose(&bedfp);
   freeMem(dynaWordCount);
   // LX END
carefulClose(&f);
}

static void loadAndAlignAll(struct bzp *bzp, 
        char *target, char *query, char *output)
/* blatz - Align genomic dna across species. */
{
struct dnaLoad *queryDl = dnaLoadOpen(query);
struct dnaLoad *targetDl = dnaLoadOpen(target);
struct blatzIndex *indexList = blatzIndexDl(targetDl, bzp->weight, bzp->unmask);
bzpTime("loaded and indexed target DNA");

// LX BEG
if (bzp->dynaWordCoverage > 0)
   {
   dynaNumWords = (pow(4,bzp->weight)); // ?? check with Jim if this is correct
   AllocArray(dynaWordCount,dynaNumWords);
   printf("Allocated word count table of size %d\n",dynaNumWords);
   dynaWordLimit = bzp->dynaWordCoverage; // cheating, should be more like:
   //dynaWordLimit = bzp->dynaWordCoverage*dynaSequenceSize/dynaNumWords;
   printf("Set word limit to  %d\n",dynaWordLimit);
   }
// LX END

verbose(2, "Loaded %d in %s, opened %s\n", slCount(indexList), target,
        query);
alignAll(bzp, indexList, queryDl, output);
}


int main(int argc, char *argv[])
/* Process command line. */
{
struct bzp *bzp = bzpDefault();

/* Do initialization. */
bzpTime(NULL);
dnaUtilOpen();
optionInit(&argc, argv, options);
if (argc != 4)
    usage(bzp);

/* Fill in parameters from command line options. */
bzpSetOptions(bzp);
loadAndAlignAll(bzp, argv[1], argv[2], argv[3]);

/* If you were a turtle you'd be home right now. */
return 0;
}
