// Copyright 2011 Martin C. Frith

// This class calculates a Position-Specific-Scoring-Matrix from
// a sequence with quality data.

// The sequence "letters" are actually small integers.  The first
// numNormalLetters letters are considered to be "normal" (e.g. ACGT
// for DNA).  The remaining letters are "abnormal" (e.g. ambiguous,
// delimiter).

// There must be numNormalLetters quality codes per letter (e.g. from
// PRB format).

// Quality codes are transformed to quality scores like this:
// q  =  qualityCode - qualityOffset

// Quality scores are transformed to error probabilities like this:
// e(q)  =  1 / (1 + 10^(q/10))

// The score matrix Sij is adjusted by the quality data like this:
// Rij    =  exp(lambda * Sij)
// P      =  1 - e
// R'ijq  =  sum(k)[ Pk * Rik ]
// S'ijq  =  nearestInt[ ln(R'ijq) / lambda ]

// The numNormalLetters-th letter is considered to be a delimiter.
// When the letter in this sequence is a delimiter, or when the letter
// in the other sequence is abnormal, the quality data is ignored:
// S'ijq = Sij.

// Some letters may be considered to be "masked" versions of other
// letters (e.g. indicated by lowercase in the original sequence).  If
// masking is not applied, masked letters get the same scores as their
// unmasked versions.  If masking is applied: score <- min(score, 0).

#ifndef QUALITY_PSSM_MAKER_HH
#define QUALITY_PSSM_MAKER_HH

#include "ScoreMatrixRow.hh"

namespace cbrc {

typedef unsigned char uchar;

class QualityPssmMaker {
 public:
  void init(const ScoreMatrixRow *scoreMatrix,
            int numNormalLetters,  // 4 for DNA
            double lambda,  // scale factor for scoreMatrix
            bool isMatchMismatchMatrix,
            int matchScore,
            int mismatchScore,  // score not cost!
	    bool isPhred,  // typically false
            int qualityOffset,  // typically 64
            const uchar* toUnmasked);  // maps letters to unmasked letters

  void make(const uchar *sequenceBeg,
            const uchar *sequenceEnd,
            const uchar *qualityBeg,
            int *pssmBeg,
            bool isApplyMasking) const;

  const double *qualToProbRight() const { return qualityToProbCorrect; }

 private:
  static const int qualityCapacity = 128;

  const ScoreMatrixRow *scoreMatrix;
  int numNormalLetters;
  double lambda;
  bool isMatchMismatchMatrix;
  const uchar* toUnmasked;

  double qualityToProbCorrect[qualityCapacity];
  int qualityToMatchScore[qualityCapacity];
  double expMatrix[scoreMatrixRowSize][scoreMatrixRowSize];
};

}

#endif
