/* bzp - blatz parameters.  Input settings structure for aligner.  Routine to
 * set options from command line.  A debugging/profiling utility function. */
/* Copyright 2005 Jim Kent.  All rights reserved. */

#include "common.h"
#include "hash.h"
#include "options.h"
#include "axt.h"
#include "gapCalc.h"
#include "portable.h"
#include "bzp.h"
#include "blatz.h"
#include "dynamic.h" // LX

static int minWeight = 6, maxWeight=15;

struct bzp *bzpDefault()
/* Return default parameters */
{
struct bzp *bzp;
AllocVar(bzp);
bzp->weight = 11;
bzp->rna = 0;
bzp->minScore = 2000;
bzp->multiHits = 1;
bzp->transition = 1;
bzp->minGapless = 1600;
bzp->minChain = 2000;
bzp->maxDrop = 1500;
bzp->maxExtend = 1500;
bzp->maxBandGap = 100;
bzp->expandWindow = 10000;
bzp->minExpand = 3000;
bzp->ss = axtScoreSchemeDefault();
bzp->cheapGap = gapCalcCheap();
bzp->gapCalc = gapCalcDefault();
bzp->unmask = FALSE;
bzp->out = "chain";
bzp->mafQ = "";
bzp->mafT = "";
bzp->maxChainsToExplore = 2000;	
// LX BEG Sep 02 2005 Sep 06 2005
bzp->dynaLimitT = VERY_LARGE_NUMBER;
bzp->dynaLimitQ = VERY_LARGE_NUMBER;
bzp->dynaBedFileQ = "";
bzp->dynaWordCoverage = 0;
// LX END
return bzp;
}

void bzpServerOptionsHelp(struct bzp *bzp)
/* Explain options having to do with server side of alignments. */
{
printf("  -weight=%d - Set number of significant bases in seeds.  Allowed range\n"
       "               is %d to %d.  Smaller seeds are slower but more sensitive\n"
       "               This controls sensitivity at lowest level\n"
       , bzp->weight, minWeight, maxWeight);
}

void bzpClientOptionsHelp(struct bzp *bzp)
/* Explain options having to do with client side of alignments. */
{
printf("  -rna - If set will treat query as mRNA and look for introns in gaps\n");  
printf("  -minScore=%d - Minimum score of to output after final chaining.  Each \n"
       "                 matching base contributes roughly 100 to the score.  This has\n"
       "                 little effect on the speed, but higher minScores will weed out\n"
       "                 weaker alignments.  Controls sensitivity at highest level.\n"
       , bzp->minScore);
printf("  -bestScoreOnly - If set only output highest scoring chain for a given query\n");
printf("  -multiHits=%d - If nonzero takes multiple hits on diagonal to trigger\n"
       "                  gapless extension (MSP). Greatly speeds up searches\n"
       "                  of larger databases at a modest cost in sensitivity\n", 
               bzp->multiHits);
printf("  -transition=%d - If nonzero search single base transition mutations in\n"
       "                   seed. This moderately increases sensitivity at the\n"
       "                   expense of tripling the large database search time\n"
       , bzp->transition);
printf("  -minGapless=%d - Minimum score of maximal gapless alignment (MSP) to \n"
       "                  trigger first level of chaining.\n"
       , bzp->minGapless);
printf("  -minChain=%d - Minimum score of  first level chain to trigger \n"
       "                 banded Smith-Waterman extension.\n", bzp->minChain);
printf("  -maxDrop=%d - Maximum amount score is allowed to drop before terminating\n"
       "                banded extension\n",  bzp->maxDrop);
printf("  -maxExtend=%d - Maximum number of bases to add in banded extension.\n"
       , bzp->maxExtend);
printf("  -maxBandGap=%d - Maximum gap size allowed in banded extension phase\n"
       , bzp->maxBandGap);
printf("  -minExpand=%d - Minimum score for chain to try expansion by doing\n"
       "                  local alignment with a smaller seed.\n"
       , bzp->minExpand);
printf("  -expandWindow=%d - Maximum size of window between blocks of chains\n"
       "                    and before and after first block in which to look\n"
       "                    for alignments using seeds of smaller weight.\n"
       , bzp->expandWindow);
printf("  -maxChainsToExplore=%d - Maximum number of first level chains to explore further\n"
       , bzp->maxChainsToExplore);
printf("  -matrix=fileName - Read scoring matrix from file.\n");
printf("  -gapCost=fileName - Read gap scoring scheme from file.\n");
printf("  -verbose=%d - Print progress info. 0=silent, 1=default, 2=wordy\n", 
        verboseLevel());
printf("  -unmask - Don't treat lower case sequence as masked\n");
printf("  -out=%s - Output in given format.  Options are chain, axt, maf, psl.\n",
        bzp->out);
printf("                For maf there are -mafT=%s and -mafQ=%s options to control\n"
       "                the sequence prefixes in maf files\n", bzp->mafT, bzp->mafQ);
// LX BEG
printf("  -dynaLimitT=%d For dynamic masking. This option controls\n"
       "                the number of hits in target positions before hits get ignored\n", bzp->dynaLimitT);
printf("  -dynaLimitQ=%d For dynamic masking. This option controls\n"
       "                the number of hits in query before hits get ignored\n", bzp->dynaLimitQ);
printf("  -dynaBedFileQ=%s Report the dynamic mask. This option controls\n"
       "                the creation of a bed file containing the mask\n", bzp->dynaBedFileQ);
printf("  -dynaWordCoverage=%d Control the number of times a word must occur\n"
       "                to cause dynamic masking of that word\n", bzp->dynaWordCoverage);
// LX END
}

void bzpSetOptions(struct bzp *bzp)
/* Modify options from command line. */
{
bzp->weight = optionInt("weight", bzp->weight);
bzp->rna = optionExists("rna");
bzp->minScore = optionInt("minScore", bzp->minScore);
bzp->bestScoreOnly = optionExists("bestScoreOnly");
bzp->multiHits = optionInt("multiHits", bzp->multiHits);
bzp->transition = optionInt("transition", bzp->transition);
bzp->minGapless = optionInt("minGapless", bzp->minGapless);
bzp->minChain = optionInt("minChain", bzp->minChain);
bzp->maxDrop = optionInt("maxDrop", bzp->maxDrop);
bzp->maxExtend = optionInt("maxExtend", bzp->maxExtend);
bzp->maxBandGap = optionInt("maxBandGap", bzp->maxBandGap);
bzp->expandWindow = optionInt("expandWindow", bzp->expandWindow);
bzp->minExpand = optionInt("minExpand", bzp->minExpand);
if (optionExists("matrix"))
    bzp->ss = axtScoreSchemeRead(optionVal("matrix", NULL));
if (optionExists("gapCost"))
    bzp->gapCalc = gapCalcFromFile(optionVal("gapCost", NULL));
else if (bzp->rna)
    bzp->gapCalc = gapCalcRnaDna();
bzp->unmask = optionExists("unmask");
bzp->out = optionVal("out", bzp->out);
bzp->mafT = optionVal("mafT", bzp->mafT);
bzp->mafQ = optionVal("mafQ", bzp->mafQ);
// LX BEG
bzp->dynaLimitT = optionInt("dynaLimitT", bzp->dynaLimitT);
bzp->dynaLimitQ = optionInt("dynaLimitQ", bzp->dynaLimitQ);
bzp->dynaBedFileQ = optionVal("dynaBedFileQ", bzp->dynaBedFileQ);
bzp->dynaWordCoverage = optionInt("dynaWordCoverage", bzp->dynaWordCoverage);
// LX END 

/* Do some checking */
if (bzp->weight < minWeight || bzp->weight > maxWeight)
    errAbort("weight must be between %d and %d", minWeight, maxWeight);
}

int bzpVersion()
/* Return version number. */
{
return 1;
}


boolean bzpTimeOn = TRUE;

void bzpTime(char *label, ...)
/* Print label and how long it's been since last call.  Call with 
 * a NULL label to initialize. */
{
if (bzpTimeOn && verboseLevel() > 1)
    {
    static long lastTime = 0;
    long time = clock1000();
    va_list args;
    va_start(args, label);
    if (label != NULL)
        {
        /* fprintf(stdout, "%ld (pid %d): ", time - lastTime, getpid()); */
        fprintf(stdout, "%ld: ", time - lastTime);
        vfprintf(stdout, label, args);
        fprintf(stdout, "\n");
        }
    lastTime = time;
    va_end(args);
    }
}
