/*
 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
 *
 * This file is part of Bowtie 2.
 *
 * Bowtie 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Bowtie 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef ALIGNER_SEED_POLICY_H_
#define ALIGNER_SEED_POLICY_H_

#include "scoring.h"
#include "simple_func.h"

#define DEFAULT_SEEDMMS 0
#define DEFAULT_SEEDLEN 22

#define DEFAULT_IVAL SIMPLE_FUNC_SQRT
#define DEFAULT_IVAL_A 1.15f
#define DEFAULT_IVAL_B 0.0f

#define DEFAULT_UNGAPPED_HITS 6

/**
 * Encapsulates the set of all parameters that affect what the
 * SeedAligner does with reads.
 */
class SeedAlignmentPolicy {

public:

	/**
	 * Parse alignment policy when provided in this format:
	 * <lab>=<val>;<lab>=<val>;<lab>=<val>...
	 *
	 * And label=value possibilities are:
	 *
	 * Bonus for a match
	 * -----------------
	 *
	 * MA=xx (default: MA=0, or MA=2 if --local is set)
	 *
	 *    xx = Each position where equal read and reference characters match up
	 *         in the alignment contriubtes this amount to the total score.
	 *
	 * Penalty for a mismatch
	 * ----------------------
	 *
	 * MMP={Cxx|Q|RQ} (default: MMP=C6)
	 *
	 *   Cxx = Each mismatch costs xx.  If MMP=Cxx is specified, quality
	 *         values are ignored when assessing penalities for mismatches.
	 *   Q   = Each mismatch incurs a penalty equal to the mismatched base's
	 *         value.
	 *   R   = Each mismatch incurs a penalty equal to the mismatched base's
	 *         rounded quality value.  Qualities are rounded off to the
	 *         nearest 10, and qualities greater than 30 are rounded to 30.
	 *
	 * Penalty for position with N (in either read or reference)
	 * ---------------------------------------------------------
	 *
	 * NP={Cxx|Q|RQ} (default: NP=C1)
	 *
	 *   Cxx = Each alignment position with an N in either the read or the
	 *         reference costs xx.  If NP=Cxx is specified, quality values are
	 *         ignored when assessing penalities for Ns.
	 *   Q   = Each alignment position with an N in either the read or the
	 *         reference incurs a penalty equal to the read base's quality
	 *         value.
	 *   R   = Each alignment position with an N in either the read or the
	 *         reference incurs a penalty equal to the read base's rounded
	 *         quality value.  Qualities are rounded off to the nearest 10,
	 *         and qualities greater than 30 are rounded to 30.
	 *
	 * Penalty for a read gap
	 * ----------------------
	 *
	 * RDG=xx,yy (default: RDG=5,3)
	 *
	 *   xx    = Read gap open penalty.
	 *   yy    = Read gap extension penalty.
	 *
	 * Total cost incurred by a read gap = xx + (yy * gap length)
	 *
	 * Penalty for a reference gap
	 * ---------------------------
	 *
	 * RFG=xx,yy (default: RFG=5,3)
	 *
	 *   xx    = Reference gap open penalty.
	 *   yy    = Reference gap extension penalty.
	 *
	 * Total cost incurred by a reference gap = xx + (yy * gap length)
	 *
	 * Minimum score for valid alignment
	 * ---------------------------------
	 *
	 * MIN=xx,yy (defaults: MIN=-0.6,-0.6, or MIN=0.0,0.66 if --local is set)
	 *
	 *   xx,yy = For a read of length N, the total score must be at least
	 *           xx + (read length * yy) for the alignment to be valid.  The
	 *           total score is the sum of all negative penalties (from
	 *           mismatches and gaps) and all positive bonuses.  The minimum
	 *           can be negative (and is by default in global alignment mode).
	 *
	 * N ceiling
	 * ---------
	 *
	 * NCEIL=xx,yy (default: NCEIL=0.0,0.15)
	 *
	 *   xx,yy = For a read of length N, the number of alignment
	 *           positions with an N in either the read or the
	 *           reference cannot exceed
	 *           ceiling = xx + (read length * yy).  If the ceiling is
	 *           exceeded, the alignment is considered invalid.
	 *
	 * Seeds
	 * -----
	 *
	 * SEED=mm,len,ival (default: SEED=0,22)
	 *
	 *   mm   = Maximum number of mismatches allowed within a seed.
	 *          Must be >= 0 and <= 2.  Note that 2-mismatch mode is
	 *          not fully sensitive; i.e. some 2-mismatch seed
	 *          alignments may be missed.
	 *   len  = Length of seed.
	 *   ival = Interval between seeds.  If not specified, seed
	 *          interval is determined by IVAL.
	 *
	 * Seed interval
	 * -------------
	 *
	 * IVAL={L|S|C},xx,yy (default: IVAL=S,1.0,0.0)
	 *
	 *   L  = let interval between seeds be a linear function of the
	 *        read length.  xx and yy are the constant and linear
	 *        coefficients respectively.  In other words, the interval
	 *        equals a * len + b, where len is the read length.
	 *        Intervals less than 1 are rounded up to 1.
	 *   S  = let interval between seeds be a function of the sqaure
	 *        root of the  read length.  xx and yy are the
	 *        coefficients.  In other words, the interval equals
	 *        a * sqrt(len) + b, where len is the read length.
	 *        Intervals less than 1 are rounded up to 1.
	 *   C  = Like S but uses cube root of length instead of square
	 *        root.
	 *
	 * Example 1:
	 *
	 *  SEED=1,10,5 and read sequence is TGCTATCGTACGATCGTAC:
	 *
	 *  The following seeds are extracted from the forward
	 *  representation of the read and aligned to the reference
	 *  allowing up to 1 mismatch:
	 *
	 *  Read:    TGCTATCGTACGATCGTACA
	 *
	 *  Seed 1+: TGCTATCGTA
	 *  Seed 2+:      TCGTACGATC
	 *  Seed 3+:           CGATCGTACA
	 *
	 *  ...and the following are extracted from the reverse-complement
	 *  representation of the read and align to the reference allowing
	 *  up to 1 mismatch:
	 *
	 *  Seed 1-: TACGATAGCA
	 *  Seed 2-:      GATCGTACGA
	 *  Seed 3-:           TGTACGATCG
	 *
	 * Example 2:
	 *
	 *  SEED=1,20,20 and read sequence is TGCTATCGTACGATC.  The seed
	 *  length is 20 but the read is only 15 characters long.  In this
	 *  case, Bowtie2 automatically shrinks the seed length to be equal
	 *  to the read length.
	 *
	 *  Read:    TGCTATCGTACGATC
	 *
	 *  Seed 1+: TGCTATCGTACGATC
	 *  Seed 1-: GATCGTACGATAGCA
	 *
	 * Example 3:
	 *
	 *  SEED=1,10,10 and read sequence is TGCTATCGTACGATC.  Only one seed
	 *  fits on the read; a second seed would overhang the end of the read
	 *  by 5 positions.  In this case, Bowtie2 extracts one seed.
	 *
	 *  Read:    TGCTATCGTACGATC
	 *
	 *  Seed 1+: TGCTATCGTA
	 *  Seed 1-: TACGATAGCA
	 */
	static void parseString(
                            const       std::string& s,
                            bool        local,
                            bool        noisyHpolymer,
                            bool        ignoreQuals,
                            int&        bonusMatchType,
                            int&        bonusMatch,
                            int&        penMmcType,
                            int&        penMmcMax,
                            int&        penMmcMin,
                            int&        penScMax,
                            int&        penScMin,
                            int&        penNType,
                            int&        penN,
                            int&        penRdExConst,
                            int&        penRfExConst,
                            int&        penRdExLinear,
                            int&        penRfExLinear,
                            SimpleFunc& costMin,
                            SimpleFunc& nCeil,
                            bool&       nCatPair,
                            int&        multiseedMms,
                            int&        multiseedLen,
                            SimpleFunc& multiseedIval,
                            size_t&     failStreak,
                            size_t&     seedRounds,
                            SimpleFunc* penCanIntronLen = NULL,
                            SimpleFunc* penNoncanIntronLen = NULL);
};

#endif /*ndef ALIGNER_SEED_POLICY_H_*/
