/*
 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
 *
 * This file is part of Bowtie 2.
 * This file is edited by Yun (Leo) Zhang for HISAT-3N.
 *
 * Bowtie 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Bowtie 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef READ_H_
#define READ_H_

#include <stdint.h>
#include <sys/time.h>
#include "ds.h"
#include "sstring.h"
#include "filebuf.h"
#include "util.h"


/**
 * the threeN_cycle
 */
/*enum {
    threeN_CT_FW = 0,
    threeN_CT_RC,
    threeN_GA_FW,
    threeN_GA_RC
};*/

enum {
    threeN_type1conversion_FW = 0,
    threeN_type1conversion_RC,
    threeN_type2conversion_FW,
    threeN_type2conversion_RC
};

enum rna_strandness_format {
    RNA_STRANDNESS_UNKNOWN = 0,
    RNA_STRANDNESS_F,
    RNA_STRANDNESS_R,
    RNA_STRANDNESS_FR,
    RNA_STRANDNESS_RF
};

typedef uint64_t TReadId;
typedef size_t TReadOff;
typedef int64_t TAlScore;

extern bool threeN;

class HitSet;

/**
 * A buffer for keeping all relevant information about a single read.
 */
struct Read {

	Read() { reset(); }
	
	Read(const char *nm, const char *seq, const char *ql) { init(nm, seq, ql); }

	void reset() {
		rdid = 0;
		endid = 0;
		alts = 0;
		trimmed5 = trimmed3 = 0;
		readOrigBuf.clear();
		patFw.clear();
		patFw_3N.clear();
		patRc.clear();
		qual.clear();
		patFwRev.clear();
		patRcRev.clear();
		qualRev.clear();
		name.clear();
		originalFw.clear();
		originalRc.clear();
		for(int j = 0; j < 3; j++) {
			altPatFw[j].clear();
			altPatFwRev[j].clear();
			altPatRc[j].clear();
			altPatRcRev[j].clear();
			altQual[j].clear();
			altQualRev[j].clear();
		}
		color = fuzzy = false;
		primer = '?';
		trimc = '?';
		filter = '?';
		seed = 0;
		ns_ = 0;
		threeN_cycle = 0;
        oppositeConversion_3N = false;
	}

	/**
	 * Finish initializing a new read.
	 */
	void finalize() {
		for(size_t i = 0; i < patFw.length(); i++) {
			if((int)patFw[i] > 3) {
				ns_++;
			}
		}
		constructRevComps();
		constructReverses();
	}

    /**
     * change patFw sequence based on current threeN_cycle and newMappingCycle.
     *
     * There are two types of changes:
     * type1conversion: hs3N_convertedFrom to hs3N_convertedTo
     * type2conversion: hs3N_convertedFromComplement to hs3N_convertedToComplement
     *
     * The initial threeN_cycle is 0. There are 4 cycle: 0, 1, 2, 3;
     *
     *             mate 1,                              mate2
     * initial:    threeN_type1conversion_FW(0),        threeN_type1conversion_FW(0),
     *             ---------------                             type1->type2       change conversion type
     * 1st cycle:  threeN_type1conversion_FW(0),        threeN_type2conversion_RC(3 = 3-0),
     * 2nd cycle:  threeN_type1conversion_RC(1),        threeN_type2conversion_FW(2 = 3-1),
     *                    type1c->type2                        type2->type1       change conversion type
     * 3rd cycle:  threeN_type2conversion_FW(2),        threeN_type1conversion_RC(1 = 3-2),
     * 4rd cycle:  threeN_type2conversion_RC(3),        threeN_type1conversion_FW(0 = 3-3),
     */
    void changePlan3N(int newMappingCycle) {
	    if (name.length() == 0) return;
	    if ((threeN_cycle == threeN_type1conversion_FW && newMappingCycle == threeN_type2conversion_RC) ||
	        (threeN_cycle == threeN_type1conversion_RC && newMappingCycle == threeN_type2conversion_FW) ||
	        (threeN_cycle == threeN_type2conversion_FW && newMappingCycle == threeN_type1conversion_RC)) {
            ns_ = 0;
            swap(patFw, patFw_3N);
            finalize();
	    }
        threeN_cycle = newMappingCycle;
        oppositeConversion_3N = false;
	}

	/**
	 * Simple init function, used for testing.
	 */
	void init(
		const char *nm,
		const char *seq,
		const char *ql)
	{
		reset();
		patFw.installChars(seq);
		qual.install(ql);
		for(size_t i = 0; i < patFw.length(); i++) {
			if((int)patFw[i] > 3) {
				ns_++;
			}
		}
		constructRevComps();
		constructReverses();
		if(nm != NULL) name.install(nm);
	}

	/// Return true iff the read (pair) is empty
	bool empty() const {
		return patFw.empty();
	}

	/// Return length of the read in the buffer
	size_t length() const {
		return patFw.length();
	}
	
	/**
	 * Return the number of Ns in the read.
	 */
	size_t ns() const {
		return ns_;
	}

	/**
	 * Construct reverse complement of the pattern and the fuzzy
	 * alternative patters.  If read is in colorspace, just reverse
	 * them.
	 */
	void constructRevComps() {
		if(color) {
			patRc.installReverse(patFw);
			for(int j = 0; j < alts; j++) {
				altPatRc[j].installReverse(altPatFw[j]);
			}
            if (threeN) originalRc.installReverse(originalFw);
		} else {
			patRc.installReverseComp(patFw);
			for(int j = 0; j < alts; j++) {
				altPatRc[j].installReverseComp(altPatFw[j]);
			}
            if (threeN) originalRc.installReverseComp(originalFw);
		}
	}

	/**
	 * Given patFw, patRc, and qual, construct the *Rev versions in
	 * place.  Assumes constructRevComps() was called previously.
	 */
	void constructReverses() {
		patFwRev.installReverse(patFw);
		patRcRev.installReverse(patRc);
		qualRev.installReverse(qual);
		for(int j = 0; j < alts; j++) {
			altPatFwRev[j].installReverse(altPatFw[j]);
			altPatRcRev[j].installReverse(altPatRc[j]);
			altQualRev[j].installReverse(altQual[j]);
		}
	}

	/**
	 * Append a "/1" or "/2" string onto the end of the name buf if
	 * it's not already there.
	 */
	void fixMateName(int i) {
		assert(i == 1 || i == 2);
		size_t namelen = name.length();
		bool append = false;
		if(namelen < 2) {
			// Name is too short to possibly have /1 or /2 on the end
			append = true;
		} else {
			if(i == 1) {
				// append = true iff mate name does not already end in /1
				append =
					name[namelen-2] != '/' ||
					name[namelen-1] != '1';
			} else {
				// append = true iff mate name does not already end in /2
				append =
					name[namelen-2] != '/' ||
					name[namelen-1] != '2';
			}
		}
		if(append) {
			name.append('/');
			name.append("012"[i]);
		}
	}

	/**
	 * Dump basic information about this read to the given ostream.
	 */
	void dump(std::ostream& os) const {
		using namespace std;
		os << name << ' ';
		if(color) {
			os << patFw.toZBufXForm("0123.");
		} else {
			os << patFw;
		}
		os << ' ';
		// Print out the fuzzy alternative sequences
		for(int j = 0; j < 3; j++) {
			bool started = false;
			if(!altQual[j].empty()) {
				for(size_t i = 0; i < length(); i++) {
					if(altQual[j][i] != '!') {
						started = true;
					}
					if(started) {
						if(altQual[j][i] == '!') {
							os << '-';
						} else {
							if(color) {
								os << "0123."[(int)altPatFw[j][i]];
							} else {
								os << altPatFw[j][i];
							}
						}
					}
				}
			}
			cout << " ";
		}
		os << qual.toZBuf() << " ";
		// Print out the fuzzy alternative quality strings
		for(int j = 0; j < 3; j++) {
			bool started = false;
			if(!altQual[j].empty()) {
				for(size_t i = 0; i < length(); i++) {
					if(altQual[j][i] != '!') {
						started = true;
					}
					if(started) {
						os << altQual[j][i];
					}
				}
			}
			if(j == 2) {
				os << endl;
			} else {
				os << " ";
			}
		}
	}
	
	/**
	 * Check whether two reads are the same in the sense that they will
	 * lead to us finding the same set of alignments.
	 */
	static bool same(
		const BTDnaString& seq1,
		const BTString&    qual1,
		const BTDnaString& seq2,
		const BTString&    qual2,
		bool qualitiesMatter)
	{
		if(seq1.length() != seq2.length()) {
			return false;
		}
		for(size_t i = 0; i < seq1.length(); i++) {
			if(seq1[i] != seq2[i]) return false;
		}
		if(qualitiesMatter) {
			if(qual1.length() != qual2.length()) {
				return false;
			}
			for(size_t i = 0; i < qual1.length(); i++) {
				if(qual1[i] != qual2[i]) return false;
			}
		}
		return true;
	}

	/**
	 * Get the nucleotide and quality value at the given offset from 5' end.
	 * If 'fw' is false, get the reverse complement.
	 */
	std::pair<int, int> get(TReadOff off5p, bool fw) const {
		assert_lt(off5p, length());
		int c = (int)patFw[off5p];
        int q = qual[off5p];
        assert_geq(q, 33);
		return make_pair((!fw && c < 4) ? (c ^ 3) : c, q - 33);
	}
	
	/**
	 * Get the nucleotide at the given offset from 5' end.
	 * If 'fw' is false, get the reverse complement.
	 */
	int getc(TReadOff off5p, bool fw) const {
		assert_lt(off5p, length());
		int c = (int)patFw[off5p];
		return (!fw && c < 4) ? (c ^ 3) : c;
	}
	
	/**
	 * Get the quality value at the given offset from 5' end.
	 */
	int getq(TReadOff off5p) const {
		assert_lt(off5p, length());
        int q = qual[off5p];
        assert_geq(q, 33);
		return q-33;
	}

#ifndef NDEBUG
	/**
	 * Check that read info is internally consistent.
	 */
	bool repOk() const {
		if(patFw.empty()) return true;
		assert_eq(qual.length(), patFw.length());
		return true;
	}
#endif

	BTDnaString patFw;            // forward-strand sequence
    BTDnaString patFw_3N;
    BTDnaString patRc;            // reverse-complement sequence
    BTDnaString patRc1;
	BTString    qual;             // quality values
    BTDnaString originalFw;       // the forward-strand sequence from read (without editing)
    BTDnaString originalRc;       // the reverse-complement sequence from read (without editing)

	BTDnaString altPatFw[3];
	BTDnaString altPatRc[3];
	BTString    altQual[3];

	BTDnaString patFwRev;
	BTDnaString patRcRev;
	BTString    qualRev;

	BTDnaString altPatFwRev[3];
	BTDnaString altPatRcRev[3];
	BTString    altQualRev[3];

	// For remembering the exact input text used to define a read
	SStringExpandable<char> readOrigBuf;

	BTString name;      // read name
	TReadId  rdid;      // 0-based id based on pair's offset in read file(s)
	TReadId  endid;     // 0-based id based on pair's offset in read file(s)
	                    // and which mate ("end") this is
	int      mate;      // 0 = single-end, 1 = mate1, 2 = mate2
	uint32_t seed;      // random seed
	size_t   ns_;       // # Ns
	int      alts;      // number of alternatives
	bool     fuzzy;     // whether to employ fuzziness
	bool     color;     // whether read is in color space
	char     primer;    // primer base, for csfasta files
	char     trimc;     // trimmed color, for csfasta files
	char     filter;    // if read format permits filter char, set it here
	int      trimmed5;  // amount actually trimmed off 5' end
	int      trimmed3;  // amount actually trimmed off 3' end
	HitSet  *hitset;    // holds previously-found hits; for chaining
	// for HISAT-3N
	int threeN_cycle;
	bool oppositeConversion_3N;
};

/**
 * A string of FmStringOps represent a string of tasks performed by the
 * best-first alignment search.  We model the search as a series of FM ops
 * interspersed with reported alignments.
 */
struct FmStringOp {
	bool alignment;  // true -> found an alignment
	TAlScore pen;    // penalty of the FM op or alignment
	size_t n;        // number of FM ops (only relevant for non-alignment)
};

/**
 * A string that summarizes the progress of an FM-index-assistet best-first
 * search.  Useful for trying to figure out what the aligner is spending its
 * time doing for a given read.
 */
struct FmString {

	/**
	 * Add one or more FM index ops to the op string
	 */
	void add(bool alignment, TAlScore pen, size_t nops) {
		if(ops.empty() || ops.back().pen != pen) {
			ops.expand();
			ops.back().alignment = alignment;
			ops.back().pen = pen;
			ops.back().n = 0;
		}
		ops.back().n++;
	}
	
	/**
	 * Reset FmString to uninitialized state.
	 */
	void reset() {
		pen = std::numeric_limits<TAlScore>::max();
		ops.clear();
	}

	/**
	 * Print a :Z optional field where certain characters (whitespace, colon
	 * and percent) are escaped using % escapes.
	 */
	void print(BTString& o, char *buf) const {
		for(size_t i = 0; i < ops.size(); i++) {
			if(i > 0) {
				o.append(';');
			}
			if(ops[i].alignment) {
				o.append("A,");
				itoa10(ops[i].pen, buf);
				o.append(buf);
			} else {
				o.append("F,");
				itoa10(ops[i].pen, buf); o.append(buf);
				o.append(',');
				itoa10(ops[i].n, buf); o.append(buf);
			}
		}
	}

	TAlScore pen;          // current penalty
	EList<FmStringOp> ops; // op string
};

/**
 * Key per-read metrics.  These are used for thresholds, allowing us to bail
 * for unproductive reads.  They also the basis of what's printed when the user
 * specifies --read-times.
 */
struct PerReadMetrics {

	PerReadMetrics() { reset(); }

	void reset() {
		nExIters =
		nExDps   = nExDpSuccs   = nExDpFails   =
		nMateDps = nMateDpSuccs = nMateDpFails =
		nExUgs   = nExUgSuccs   = nExUgFails   =
		nMateUgs = nMateUgSuccs = nMateUgFails =
		nExEes   = nExEeSuccs   = nExEeFails   =
		nRedundants =
		nEeFmops = nSdFmops = nExFmops =
		nDpFail = nDpFailStreak = nDpLastSucc =
		nUgFail = nUgFailStreak = nUgLastSucc =
		nEeFail = nEeFailStreak = nEeLastSucc =
		nFilt = 0;
		nFtabs = 0;
		nRedSkip = 0;
		nRedFail = 0;
		nRedIns = 0;
		doFmString = false;
		nSeedRanges = nSeedElts = 0;
		nSeedRangesFw = nSeedEltsFw = 0;
		nSeedRangesRc = nSeedEltsRc = 0;
		seedMedian = seedMean = 0;
		bestLtMinscMate1 =
		bestLtMinscMate2 = std::numeric_limits<TAlScore>::min();
		fmString.reset();
	}

	struct timeval  tv_beg; // timer start to measure how long alignment takes
	struct timezone tz_beg; // timer start to measure how long alignment takes

	uint64_t nExIters;      // iterations of seed hit extend loop

	uint64_t nExDps;        // # extend DPs run on this read
	uint64_t nExDpSuccs;    // # extend DPs run on this read
	uint64_t nExDpFails;    // # extend DPs run on this read
	
	uint64_t nExUgs;        // # extend ungapped alignments run on this read
	uint64_t nExUgSuccs;    // # extend ungapped alignments run on this read
	uint64_t nExUgFails;    // # extend ungapped alignments run on this read

	uint64_t nExEes;        // # extend ungapped alignments run on this read
	uint64_t nExEeSuccs;    // # extend ungapped alignments run on this read
	uint64_t nExEeFails;    // # extend ungapped alignments run on this read

	uint64_t nMateDps;      // # mate DPs run on this read
	uint64_t nMateDpSuccs;  // # mate DPs run on this read
	uint64_t nMateDpFails;  // # mate DPs run on this read
	
	uint64_t nMateUgs;      // # mate ungapped alignments run on this read
	uint64_t nMateUgSuccs;  // # mate ungapped alignments run on this read
	uint64_t nMateUgFails;  // # mate ungapped alignments run on this read

	uint64_t nRedundants;   // # redundant seed hits
	
	uint64_t nSeedRanges;   // # BW ranges found for seeds
	uint64_t nSeedElts;     // # BW elements found for seeds

	uint64_t nSeedRangesFw; // # BW ranges found for seeds from fw read
	uint64_t nSeedEltsFw;   // # BW elements found for seeds from fw read

	uint64_t nSeedRangesRc; // # BW ranges found for seeds from fw read
	uint64_t nSeedEltsRc;   // # BW elements found for seeds from fw read
	
	uint64_t seedMedian;    // median seed hit count
	uint64_t seedMean;      // rounded mean seed hit count
	
	uint64_t nEeFmops;      // FM Index ops for end-to-end alignment
	uint64_t nSdFmops;      // FM Index ops used to align seeds
	uint64_t nExFmops;      // FM Index ops used to resolve offsets
	
	uint64_t nFtabs;        // # ftab lookups
	uint64_t nRedSkip;      // # times redundant path was detected and aborted
	uint64_t nRedFail;      // # times a path was deemed non-redundant
	uint64_t nRedIns;       // # times a path was added to redundancy list
	
	uint64_t nDpFail;       // number of dp failures in a row up until now
	uint64_t nDpFailStreak; // longest streak of dp failures
	uint64_t nDpLastSucc;   // index of last dp attempt that succeeded
	
	uint64_t nUgFail;       // number of ungap failures in a row up until now
	uint64_t nUgFailStreak; // longest streak of ungap failures
	uint64_t nUgLastSucc;   // index of last ungap attempt that succeeded

	uint64_t nEeFail;       // number of ungap failures in a row up until now
	uint64_t nEeFailStreak; // longest streak of ungap failures
	uint64_t nEeLastSucc;   // index of last ungap attempt that succeeded
	
	uint64_t nFilt;         // # mates filtered
	
	TAlScore bestLtMinscMate1; // best invalid score observed for mate 1
	TAlScore bestLtMinscMate2; // best invalid score observed for mate 2
	
	// For collecting information to go into an FM string
	bool doFmString;
	FmString fmString;
};

#endif /*READ_H_*/
