/*
 * Copyright 2012, Ben Langmead <langmea@cs.jhu.edu>
 *
 * This file is part of Bowtie 2.
 *
 * Bowtie 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Bowtie 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * aligner_driver.h
 *
 * REDUNDANT SEED HITS
 *
 * We say that two seed hits are redundant if they trigger identical
 * seed-extend dynamic programming problems.  Put another way, they both lie on
 * the same diagonal of the overall read/reference dynamic programming matrix.
 * Detecting redundant seed hits is simple when the seed hits are ungapped.  We
 * do this after offset resolution but before the offset is converted to genome
 * coordinates (see uses of the seenDiags1_/seenDiags2_ fields for examples).
 *
 * REDUNDANT ALIGNMENTS
 *
 * In an unpaired context, we say that two alignments are redundant if they
 * share any cells in the global DP table.  Roughly speaking, this is like
 * saying that two alignments are redundant if any read character aligns to the
 * same reference character (same reference sequence, same strand, same offset)
 * in both alignments.
 *
 * In a paired-end context, we say that two paired-end alignments are redundant
 * if the mate #1s are redundant and the mate #2s are redundant.
 *
 * How do we enforce this?  In the unpaired context, this is relatively simple:
 * the cells from each alignment are checked against a set containing all cells
 * from all previous alignments.  Given a new alignment, for each cell in the
 * new alignment we check whether it is in the set.  If there is any overlap,
 * the new alignment is rejected as redundant.  Otherwise, the new alignment is
 * accepted and its cells are added to the set.
 *
 * Enforcement in a paired context is a little trickier.  Consider the
 * following approaches:
 *
 * 1. Skip anchors that are redundant with any previous anchor or opposite
 *    alignment.  This is sufficient to ensure no two concordant alignments
 *    found are redundant.
 *
 * 2. Same as scheme 1, but with a "transitive closure" scheme for finding all
 *    concordant pairs in the vicinity of an anchor.  Consider the AB/AC
 *    scenario from the previous paragraph.  If B is the anchor alignment, we
 *    will find AB but not AC.  But under this scheme, once we find AB we then
 *    let B be a new anchor and immediately look for its opposites.  Likewise,
 *    if we find any opposite, we make them anchors and continue searching.  We
 *    don't stop searching until every opposite is used as an anchor.
 *
 * 3. Skip anchors that are redundant with any previous anchor alignment (but
 *    allow anchors that are redundant with previous opposite alignments).
 *    This isn't sufficient to avoid redundant concordant alignments.  To avoid
 *    redundant concordants, we need an additional procedure that checks each
 *    new concordant alignment one-by-one against a list of previous concordant
 *    alignments to see if it is redundant.
 *
 * We take approach 1.
 */

#ifndef ALIGNER_DRIVER_H_
#define ALIGNER_DRIVER_H_

#include "aligner_seed2.h"
#include "simple_func.h"
#include "aln_sink.h"

/**
 * Concrete subclass of DescentRootSelector.  Puts a root every 'ival' chars,
 * where 'ival' is determined by user-specified parameters.  A root is filtered
 * out if the end of the read is less than 'landing' positions away, in the
 * direction of the search.
 */
class AlignerDriverRootSelector : public DescentRootSelector {

public:

	AlignerDriverRootSelector(
		double consExp,
		const SimpleFunc& rootIval,
		size_t landing)
	{
		consExp_ = consExp;
		rootIval_ = rootIval;
		landing_ = landing;
	}
	
	virtual ~AlignerDriverRootSelector() { }

	virtual void select(
		const Read& q,                 // read that we're selecting roots for
		const Read* qo,                // opposite mate, if applicable
		bool nofw,                     // don't add roots for fw read
		bool norc,                     // don't add roots for rc read
		EList<DescentConfig>& confs,   // put DescentConfigs here
		EList<DescentRoot>& roots);    // put DescentRoot here

protected:

	double consExp_;
	SimpleFunc rootIval_;
	size_t landing_;
};

/**
 * Return values from extendSeeds and extendSeedsPaired.
 */
enum {
	// Candidates were examined exhaustively
	ALDRIVER_EXHAUSTED_CANDIDATES = 1,
	// The policy does not need us to look any further
	ALDRIVER_POLICY_FULFILLED,
	// We stopped because we ran up against a limit on how much work we should
	// do for one set of seed ranges, e.g. the limit on number of consecutive
	// unproductive DP extensions
	ALDRIVER_EXCEEDED_LIMIT
};

/**
 * This class is the glue between a DescentDriver and the dynamic programming
 * implementations in Bowtie 2.  The DescentDriver is used to find some very
 * high-scoring alignments, but is additionally used to rank partial alignments
 * so that they can be extended using dynamic programming.
 */
template <typename index_t>
class AlignerDriver {

public:

	AlignerDriver(
		double consExp,
		const SimpleFunc& rootIval,
		size_t landing,
		bool veryVerbose,
		const SimpleFunc& totsz,
		const SimpleFunc& totfmops) :
		sel_(consExp, rootIval, landing),
		alsel_(),
		dr1_(veryVerbose),
		dr2_(veryVerbose)
	{
		totsz_ = totsz;
		totfmops_ = totfmops;
	}
	
	/**
	 * Initialize driver with respect to a new read or pair.
	 */
	void initRead(
		const Read& q1,
		bool nofw,
		bool norc,
		TAlScore minsc,
		TAlScore maxpen,
		const Read* q2)
	{
		dr1_.initRead(q1, nofw, norc, minsc, maxpen, q2, &sel_);
		red1_.init(q1.length());
		paired_ = false;
		if(q2 != NULL) {
			dr2_.initRead(*q2, nofw, norc, minsc, maxpen, &q1, &sel_);
			red2_.init(q2->length());
			paired_ = true;
		} else {
			dr2_.reset();
		}
		size_t totsz = totsz_.f<size_t>(q1.length());
		size_t totfmops = totfmops_.f<size_t>(q1.length());
		stop_.init(
			totsz,
			0,
			true,
			totfmops);
	}
	
	/**
	 * Start the driver.  The driver will begin by conducting a best-first,
	 * index-assisted search through the space of possible full and partial
	 * alignments.  This search may be followed up with a dynamic programming
	 * extension step, taking a prioritized set of partial SA ranges found
	 * during the search and extending each with DP.  The process might also be
	 * iterated, with the search being occasioanally halted so that DPs can be
	 * tried, then restarted, etc.
	 */
	int go(
		const Scoring& sc,
		const GFM<index_t>& gfmFw,
		const GFM<index_t>& gfmBw,
		const BitPairReference& ref,
		DescentMetrics& met,
		WalkMetrics& wlm,
		PerReadMetrics& prm,
		RandomSource& rnd,
           AlnSinkWrap<index_t>& sink);
	
	/**
	 * Reset state of all DescentDrivers.
	 */
	void reset() {
		dr1_.reset();
		dr2_.reset();
		red1_.reset();
		red2_.reset();
	}

protected:

	AlignerDriverRootSelector sel_;   // selects where roots should go
	DescentAlignmentSelector<index_t> alsel_;  // one selector can deal with >1 drivers
	DescentDriver<index_t> dr1_;               // driver for mate 1/unpaired reads
	DescentDriver<index_t> dr2_;               // driver for paired-end reads
	DescentStoppingConditions stop_;  // when to pause index-assisted BFS
	bool paired_;                     // current read is paired?

	SimpleFunc totsz_;      // memory limit on best-first search data
	SimpleFunc totfmops_;   // max # FM ops for best-first search

	// For detecting redundant alignments
	RedundantAlns  red1_;   // database of cells used for mate 1 alignments
	RedundantAlns  red2_;   // database of cells used for mate 2 alignments

	// For AlnRes::matchesRef
	ASSERT_ONLY(SStringExpandable<char> raw_refbuf_);
	ASSERT_ONLY(SStringExpandable<uint32_t> raw_destU32_);
	ASSERT_ONLY(EList<bool> raw_matches_);
	ASSERT_ONLY(BTDnaString tmp_rf_);
	ASSERT_ONLY(BTDnaString tmp_rdseq_);
	ASSERT_ONLY(BTString tmp_qseq_);
	ASSERT_ONLY(EList<index_t> tmp_reflens_);
	ASSERT_ONLY(EList<index_t> tmp_refoffs_);
};

#endif /* defined(ALIGNER_DRIVER_H_) */
