/*
 * Copyright 2015, Daehwan Kim <infphilo@gmail.com>
 *
 * This file is part of HISAT 2.
 *
 * HISAT 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * HISAT 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with HISAT 2.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef GFM_H_
#define GFM_H_

#include <stdint.h>
#include <string.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <memory>
#include <fcntl.h>
#include <math.h>
#include <errno.h>
#include <set>
#include <stdexcept>
#include <sys/stat.h>
#ifdef BOWTIE_MM
#include <sys/mman.h>
#include <sys/shm.h>
#endif
#include "shmem.h"
#include "alphabet.h"
#include "assert_helpers.h"
#include "bitpack.h"
#include "blockwise_sa.h"
#include "endian_swap.h"
#include "word_io.h"
#include "random_source.h"
#include "ref_read.h"
#include "threading.h"
#include "str_util.h"
#include "mm.h"
#include "timer.h"
#include "reference.h"
#include "search_globals.h"
#include "ds.h"
#include "random_source.h"
#include "mem_ids.h"
#include "btypes.h"
#include "tokenize.h"
#include "repeat.h"
#include "repeat_kmer.h"

#ifdef POPCNT_CAPABILITY
#include "processor_support.h"
#endif

#include "gbwt_graph.h"

using namespace std;

// From ccnt_lut.cpp, automatically generated by gen_lookup_tables.pl
extern uint8_t cCntLUT_4[4][4][256];
extern uint8_t cCntLUT_4_rev[4][4][256];
extern uint8_t cCntBIT[8][256];

extern bool threeN;

static const uint64_t c_table[4] = {
    0xffffffffffffffff,
    0xaaaaaaaaaaaaaaaa,
    0x5555555555555555,
    0x0000000000000000
};

#ifndef VMSG_NL
#define VMSG_NL(...) \
if(this->verbose()) { \
	stringstream tmp; \
	tmp << __VA_ARGS__ << endl; \
	this->verbose(tmp.str()); \
}
#endif

#ifndef VMSG
#define VMSG(...) \
if(this->verbose()) { \
	stringstream tmp; \
	tmp << __VA_ARGS__; \
	this->verbose(tmp.str()); \
}
#endif

/**
 * Flags describing type of Ebwt.
 */
enum GFM_FLAGS {
	GFM_ENTIRE_REV = 4 // true -> reverse Ebwt is the whole
                       // concatenated string reversed, rather than
                       // each stretch reversed
};

/**
 * Extended Burrows-Wheeler transform header.  This together with the
 * actual data arrays and other text-specific parameters defined in
 * class Ebwt constitute the entire Ebwt.
 */
template <typename index_t = uint32_t>
class GFMParams {

public:
	GFMParams() { }

	GFMParams(
		index_t len,
        index_t gbwtLen,
        index_t numNodes,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
        index_t eftabLen,
		bool entireReverse)
	{
		init(len, gbwtLen, numNodes, lineRate, offRate, ftabChars, eftabLen, entireReverse);
	}

	GFMParams(const GFMParams& gh) {
		init(gh._len, gh._gbwtLen, gh._numNodes, gh._lineRate, gh._offRate,
		     gh._ftabChars, gh._eftabLen, gh._entireReverse);
	}

	void init(
              index_t len,
              index_t gbwtLen,
              index_t numNodes,
              int32_t lineRate,
              int32_t offRate,
              int32_t ftabChars,
              index_t eftabLen,
              bool entireReverse)
	{
		_entireReverse = entireReverse;
        _linearFM = (len + 1 == gbwtLen || gbwtLen == 0);
		_len = len;
        _gbwtLen = (gbwtLen == 0 ? len + 1 : gbwtLen);
        _numNodes = (numNodes == 0 ? len + 1 : numNodes);
        if(_linearFM) {
            _sz = (len+3)/4;
            _gbwtSz = _gbwtLen/4 + 1;
        } else {
            _sz = (len+1)/2;
            _gbwtSz = _gbwtLen/2 + 1;
        }
		_lineRate = lineRate;
		_origOffRate = offRate;
		_offRate = offRate;
		_offMask = std::numeric_limits<index_t>::max() << _offRate;
		_ftabChars = ftabChars;
		_eftabLen = eftabLen;
		_eftabSz = _eftabLen*sizeof(index_t);
		_ftabLen = (1 << (_ftabChars*2))+1;
		_ftabSz = _ftabLen*sizeof(index_t);
		_offsLen = (_numNodes + (1 << _offRate) - 1) >> _offRate;
		_offsSz = _offsLen*sizeof(index_t);
		_lineSz = 1 << _lineRate;
		_sideSz = _lineSz * 1 /* lines per side */;
        if(_linearFM) {
            _sideGbwtSz = _sideSz - (sizeof(index_t) * 4);
            _sideGbwtLen = _sideGbwtSz << 2;
        } else {
            _sideGbwtSz = _sideSz - (sizeof(index_t) * 6);
            _sideGbwtLen = _sideGbwtSz << 1;
        }
		_numSides = (_gbwtSz+(_sideGbwtSz)-1)/(_sideGbwtSz);
		_numLines = _numSides * 1 /* lines per side */;
		_gbwtTotLen = _numSides * _sideSz;
		_gbwtTotSz = _gbwtTotLen;
		assert(repOk());
	}

	index_t len() const           { return _len; }
	index_t lenNucs() const       { return _len; }
    index_t gbwtLen() const       { return _gbwtLen; }
	index_t sz() const            { return _sz; }
	index_t gbwtSz() const        { return _gbwtSz; }
	int32_t lineRate() const      { return _lineRate; }
	int32_t origOffRate() const   { return _origOffRate; }
	int32_t offRate() const       { return _offRate; }
	index_t offMask() const       { return _offMask; }
	int32_t ftabChars() const     { return _ftabChars; }
	index_t eftabLen() const      { return _eftabLen; }
	index_t eftabSz() const       { return _eftabSz; }
	index_t ftabLen() const       { return _ftabLen; }
	index_t ftabSz() const        { return _ftabSz; }
	index_t offsLen() const       { return _offsLen; }
	index_t offsSz() const        { return _offsSz; }
	index_t lineSz() const        { return _lineSz; }
	index_t sideSz() const        { return _sideSz; }
	index_t sideGbtSz() const     { return _sideGbwtSz; }
	index_t sideGbwtLen() const   { return _sideGbwtLen; }
	index_t numSides() const      { return _numSides; }
	index_t numLines() const      { return _numLines; }
	index_t gbwtTotLen() const    { return _gbwtTotLen; }
	index_t gbwtTotSz() const     { return _gbwtTotSz; }
	bool entireReverse() const    { return _entireReverse; }
    bool linearFM() const            { return _linearFM; }
    index_t numNodes() const      { return _numNodes; }

	/**
	 * Set a new suffix-array sampling rate, which involves updating
	 * rate, mask, sample length, and sample size.
	 */
	void setOffRate(int __offRate) {
		_offRate = __offRate;
		_offMask = std::numeric_limits<index_t>::max() << _offRate;
		_offsLen = (_gbwtLen + (1 << _offRate) - 1) >> _offRate;
		_offsSz = _offsLen * sizeof(index_t);
	}

#ifndef NDEBUG
	/// Check that this EbwtParams is internally consistent
	bool repOk() const {
		// assert_gt(_len, 0);
		assert_gt(_lineRate, 3);
		assert_geq(_offRate, 0);
		assert_leq(_ftabChars, 16);
		assert_geq(_ftabChars, 1);
        assert_lt(_lineRate, 32);
		assert_lt(_ftabChars, 32);
		assert_eq(0, _gbwtTotSz % _lineSz);
		return true;
	}
#endif

	/**
	 * Pretty-print the header contents to the given output stream.
	 */
	void print(ostream& out) const {
		out << "Headers:" << endl
		    << "    len: "          << _len << endl
		    << "    gbwtLen: "      << _gbwtLen << endl
            << "    nodes: "        << _numNodes << endl
		    << "    sz: "           << _sz << endl
		    << "    gbwtSz: "       << _gbwtSz << endl
		    << "    lineRate: "     << _lineRate << endl
		    << "    offRate: "      << _offRate << endl
		    << "    offMask: 0x"    << hex << _offMask << dec << endl
		    << "    ftabChars: "    << _ftabChars << endl
		    << "    eftabLen: "     << _eftabLen << endl
		    << "    eftabSz: "      << _eftabSz << endl
		    << "    ftabLen: "      << _ftabLen << endl
		    << "    ftabSz: "       << _ftabSz << endl
		    << "    offsLen: "      << _offsLen << endl
		    << "    offsSz: "       << _offsSz << endl
		    << "    lineSz: "       << _lineSz << endl
		    << "    sideSz: "       << _sideSz << endl
		    << "    sideGbwtSz: "   << _sideGbwtSz << endl
		    << "    sideGbwtLen: "  << _sideGbwtLen << endl
		    << "    numSides: "     << _numSides << endl
		    << "    numLines: "     << _numLines << endl
		    << "    gbwtTotLen: "   << _gbwtTotLen << endl
		    << "    gbwtTotSz: "    << _gbwtTotSz << endl
		    << "    reverse: "      << _entireReverse << endl
            << "    linearFM: "     << (_linearFM ? "Yes" : "No") << endl;
	}

	index_t  _len;
	index_t  _gbwtLen;
	index_t  _sz;
	index_t  _gbwtSz;
	int32_t  _lineRate;
	int32_t  _origOffRate;
	int32_t  _offRate;
	index_t  _offMask;
	int32_t  _ftabChars;
	index_t  _eftabLen;
	index_t  _eftabSz;
	index_t  _ftabLen;
	index_t  _ftabSz;
	index_t  _offsLen;
	index_t  _offsSz;
	index_t  _lineSz;
	index_t  _sideSz;
	index_t  _sideGbwtSz;
	index_t  _sideGbwtLen;
	index_t  _numSides;
	index_t  _numLines;
	index_t  _gbwtTotLen;
	index_t  _gbwtTotSz;
	bool     _entireReverse;
    bool     _linearFM;
    index_t  _numNodes;
};

/**
 * Exception to throw when a file-realted error occurs.
 */
class GFMFileOpenException : public std::runtime_error {
public:
	GFMFileOpenException(const std::string& msg = "") :
		std::runtime_error(msg) { }
};

/**
 * Calculate size of file with given name.
 */
static inline int64_t fileSize(const char* name) {
	std::ifstream f;
	f.open(name, std::ios_base::binary | std::ios_base::in);
	if (!f.good() || f.eof() || !f.is_open()) { return 0; }
	f.seekg(0, std::ios_base::beg);
	std::ifstream::pos_type begin_pos = f.tellg();
	f.seekg(0, std::ios_base::end);
	return static_cast<int64_t>(f.tellg() - begin_pos);
}

/**
 * Encapsulates a location in the gbwt text in terms of the side it
 * occurs in and its offset within the side.
 */
template <typename index_t = uint32_t>
struct SideLocus {
	SideLocus() :
	_sideByteOff(0),
	_sideNum(0),
	_charOff(0),
	_by(-1),
	_bp(-1) { }

	/**
	 * Construct from row and other relevant information about the Ebwt.
	 */
	SideLocus(index_t row, const GFMParams<index_t>& ep, const uint8_t* ebwt) {
		initFromRow(row, ep, ebwt);
	}

	/**
	 * Init two SideLocus objects from a top/bot pair, using the result
	 * from one call to initFromRow to possibly avoid a second call.
	 */
	static void initFromTopBot(
		index_t top,
		index_t bot,
		const GFMParams<index_t>& gp,
		const uint8_t* gfm,
		SideLocus& ltop,
		SideLocus& lbot)
	{
		const index_t sideGbwtLen = gp._sideGbwtLen;
		assert_gt(bot, top);
		ltop.initFromRow(top, gp, gfm);
		index_t spread = bot - top;
		// Many cache misses on the following lines
		if(ltop._charOff + spread < sideGbwtLen) {
			lbot._charOff = ltop._charOff + spread;
			lbot._sideNum = ltop._sideNum;
			lbot._sideByteOff = ltop._sideByteOff;
			lbot._by = lbot._charOff >> 2;
			assert_lt(lbot._by, (int)gp._sideGbwtSz);
			lbot._bp = lbot._charOff & 0x3;
		} else {
			lbot.initFromRow(bot, gp, gfm);
		}
	}

	/**
	 * Calculate SideLocus based on a row and other relevant
	 * information about the shape of the Ebwt.
	 */
	void initFromRow(
                     index_t row,
                     const GFMParams<index_t>& gp,
                     const uint8_t* gfm) {
		const index_t sideSz      = gp._sideSz;
		// Side length is hard-coded for now; this allows the compiler
		// to do clever things to accelerate / and %.
		_sideNum                  = row / gp._sideGbwtLen;
		assert_lt(_sideNum, gp._numSides);
		_charOff                  = row % gp._sideGbwtLen;
		_sideByteOff              = _sideNum * sideSz;
		assert_leq(row, gp._gbwtLen);
		assert_leq(_sideByteOff + sideSz, gp._gbwtTotSz);
		// Tons of cache misses on the next line
		_by = _charOff >> 2; // byte within side
		assert_lt(_by, (int)gp._sideGbwtSz);
		_bp = _charOff & 0x3;  // bit-pair within byte
	}
    
    /**
     * Init two SideLocus objects from a top/bot pair, using the result
     * from one call to initFromRow to possibly avoid a second call.
     */
    static void initFromTopBot_bit(
                                   index_t top,
                                   index_t bot,
                                   const GFMParams<index_t>& gp,
                                   const uint8_t* gfm,
                                   SideLocus& ltop,
                                   SideLocus& lbot)
    {
        const index_t sideGbwtLen = gp._sideGbwtLen;
        // assert_gt(bot, top);
        ltop.initFromRow_bit(top, gp, gfm);
        index_t spread = bot - top;
        // Many cache misses on the following lines
        if(ltop._charOff + spread < sideGbwtLen) {
            lbot._charOff = ltop._charOff + spread;
            lbot._sideNum = ltop._sideNum;
            lbot._sideByteOff = ltop._sideByteOff;
            lbot._by = lbot._charOff >> 3;
            assert_lt(lbot._by, (int)gp._sideGbwtSz);
            lbot._bp = lbot._charOff & 0x7;
        } else {
            lbot.initFromRow_bit(bot, gp, gfm);
        }
    }
    
    /**
     * Calculate SideLocus based on a row and other relevant
     * information about the shape of the Ebwt.
     */
    void initFromRow_bit(
                         index_t row,
                         const GFMParams<index_t>& gp,
                         const uint8_t* gfm) {
        const index_t sideSz      = gp._sideSz;
        // Side length is hard-coded for now; this allows the compiler
        // to do clever things to accelerate / and %.
        _sideNum                  = row / gp._sideGbwtLen;
        assert_lt(_sideNum, gp._numSides);
        _charOff                  = row % gp._sideGbwtLen;
        _sideByteOff              = _sideNum * sideSz;
        assert_lt(row, gp._gbwtLen);
        assert_leq(_sideByteOff + sideSz, gp._gbwtTotSz);
        // Tons of cache misses on the next line
        _by = _charOff >> 3; // byte within side
        assert_lt(_by, (int)gp._sideGbwtSz);
        _bp = _charOff & 0x7;  // bit-pair within byte
    }
	
	/**
	 * Transform this SideLocus to refer to the next side (i.e. the one
	 * corresponding to the next side downstream).  Set all cursors to
	 * point to the beginning of the side.
	 */
	void nextSide(const GFMParams<index_t>& gp) {
		assert(valid());
		_sideByteOff += gp.sideSz();
		_sideNum++;
		_by = _bp = _charOff = 0;
		assert(valid());
	}

	/**
	 * Return true iff this is an initialized SideLocus
	 */
	bool valid() const {
		if(_bp != -1) {
			return true;
		}
		return false;
	}
	
	/**
	 * Convert locus to BW row it corresponds to.
	 */
    index_t toBWRow(const GFMParams<index_t>& gp) const;
	
#ifndef NDEBUG
	/**
	 * Check that SideLocus is internally consistent and consistent
	 * with the (provided) EbwtParams.
	 */
	bool repOk(const GFMParams<index_t>& gp) const {
		ASSERT_ONLY(index_t row = toBWRow(gp));
        assert_leq(row, gp._gbwtLen);
		assert_range(-1, 3, _bp);
		assert_range(0, (int)gp._sideGbwtSz, _by);
		return true;
	}
#endif

	/// Make this look like an invalid SideLocus
	void invalidate() {
		_bp = -1;
	}

	/**
	 * Return a read-only pointer to the beginning of the top side.
	 */
	const uint8_t *side(const uint8_t* gbwt) const {
		return gbwt + _sideByteOff;
	}
    
    /**
	 * Return a read-only pointer to the beginning of the top side.
	 */
	const uint8_t *next_side(const GFMParams<index_t>& gp, const uint8_t* gbwt) const {
        if(_sideByteOff + gp._sideSz < gp._ebwtTotSz) {
            return gbwt + _sideByteOff + gp._sideSz;
        } else {
            return NULL;
        }
	}
    
	index_t _sideByteOff; // offset of top side within ebwt[]
	index_t _sideNum;     // index of side
	index_t _charOff;     // character offset within side
	int32_t _by;          // byte within side (not adjusted for bw sides)
	int32_t _bp;          // bitpair within byte (not adjusted for bw sides)
};

/**
 * Convert locus to BW row it corresponds to.
 */
template <typename index_t>
inline index_t SideLocus<index_t>::toBWRow(const GFMParams<index_t>& gp) const {
    return _sideNum * (gp._sideGbwtSz << (gp.linearFM() ? 2 : 1)) + _charOff;
}

#ifdef POPCNT_CAPABILITY   // wrapping of "struct"
struct USE_POPCNT_GENERIC {
#endif
    // Use this standard bit-bashing population count
    inline static int pop64(uint64_t x) {
        // Lots of cache misses on following lines (>10K)
        x = x - ((x >> 1) & 0x5555555555555555llu);
        x = (x & 0x3333333333333333llu) + ((x >> 2) & 0x3333333333333333llu);
        x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0Fllu;
        x = x + (x >> 8);
        x = x + (x >> 16);
        x = x + (x >> 32);
        return (int)(x & 0x3Fllu);
    }
#ifdef POPCNT_CAPABILITY  // wrapping a "struct"
};
#endif

#ifdef POPCNT_CAPABILITY
struct USE_POPCNT_INSTRUCTION {
    inline static int pop64(uint64_t x) {
        int64_t count;
#ifdef USING_MSC_COMPILER
		count = __popcnt64(x);
#else
        asm ("popcntq %[x],%[count]\n": [count] "=&r" (count): [x] "r" (x));
#endif
        return (int)count;
    }
};
#endif

/**
 * Tricky-bit-bashing bitpair counting for given two-bit value (0-3)
 * within a 64-bit argument.
 */
#ifdef POPCNT_CAPABILITY
template<typename Operation>
#endif
inline static int countInU64(int c, uint64_t dw) {
    uint64_t c0 = c_table[c];
    uint64_t x0 = dw ^ c0;
    uint64_t x1 = (x0 >> 1);
    uint64_t x2 = x1 & (0x5555555555555555);
    uint64_t x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
    uint64_t tmp = Operation().pop64(x3);
#else
    uint64_t tmp = pop64(x3);
#endif
    return (int) tmp;
}

#ifdef POPCNT_CAPABILITY   // wrapping of "struct"
struct USE_POPCNT_GENERIC_BITS {
	// Use this standard bit-bashing population count
	inline static uint64_t pop64(uint64_t x) {
#else
// Use this standard bit-bashing population count
	inline static uint64_t pop6464(uint64_t x) {
#endif
		x -= (x >> 1) & 0x5555555555555555ULL;
        x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
        x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
        return int((x * 0x0101010101010101ULL) >> 56);
    }
#ifdef POPCNT_CAPABILITY  // wrapping a "struct"
};
#endif

/**
 * Tricky-bit-bashing bitpair counting for given two-bit value (0-3)
 * within a 64-bit argument.
 */
#ifdef POPCNT_CAPABILITY
template<typename Operation>
#endif
inline static int countInU64_bits(uint64_t dw) {
#ifdef POPCNT_CAPABILITY
    uint64_t tmp = Operation().pop64(dw);
#else
    uint64_t tmp = pop6464(dw);
#endif
    return (int) tmp;
}

// Forward declarations for Ebwt class
class GFMSearchParams;

/**
 * Extended Burrows-Wheeler transform data.
 *
 * An Ebwt may be transferred to and from RAM with calls to
 * evictFromMemory() and loadIntoMemory().  By default, a newly-created
 * Ebwt is not loaded into memory; if the user would like to use a
 * newly-created Ebwt to answer queries, they must first call
 * loadIntoMemory().
 */
template <class index_t = uint32_t>
class GFM {
public:
	#define GFM_INITS \
	    _toBigEndian(currentlyBigEndian()), \
	    _overrideOffRate(overrideOffRate), \
	    _verbose(verbose), \
	    _passMemExc(passMemExc), \
	    _sanity(sanityCheck), \
	    fw_(fw), \
	    _in1(NULL), \
	    _in2(NULL), \
	    _nPat(0), \
	    _nFrag(0), \
	    _plen(EBWT_CAT), \
	    _rstarts(EBWT_CAT), \
	    _fchr(EBWT_CAT), \
	    _ftab(EBWT_CAT), \
	    _eftab(EBWT_CAT), \
	    _offs(EBWT_CAT), \
	    _gfm(EBWT_CAT), \
	    _useMm(false), \
	    useShmem_(false), \
	    _refnames(EBWT_CAT), \
        mmFile1_(NULL), \
	    mmFile2_(NULL), \
        _nthreads(1)

        GFM() {}
	/// Construct a GFM from the given input file
	GFM(const string& in,
        ALTDB<index_t>* altdb,
        RepeatDB<index_t>* repeatdb,
        EList<size_t>* readLens,
        int needEntireReverse,
        bool fw,
        int32_t overrideOffRate, // = -1,
        int32_t offRatePlus, // = -1,
        bool useMm, // = false,
        bool useShmem, // = false,
        bool mmSweep, // = false,
        bool loadNames, // = false,
        bool loadSASamp, // = true,
        bool loadFtab, // = true,
        bool loadRstarts, // = true,
        bool loadSpliceSites, // = true,
        bool verbose, // = false,
        bool startVerbose, // = false,
        bool passMemExc, // = false,
        bool sanityCheck, // = false)
        bool useHaplotype, // = false
        bool skipLoading = false) :
        GFM_INITS
	{
		assert(!useMm || !useShmem);
        
#ifdef POPCNT_CAPABILITY
        ProcessorSupport ps;
        _usePOPCNTinstruction = ps.POPCNTenabled();
#endif
        
		packed_ = false;
		_useMm = useMm;
		useShmem_ = useShmem;
		_in1Str = in + ".1." + gfm_ext;
		_in2Str = in + ".2." + gfm_ext;

        if(skipLoading) return;
		
        if(repeatdb == NULL) {
            readIntoMemory(
                           fw ? -1 : needEntireReverse, // need REF_READ_REVERSE
                           loadSASamp,  // load the SA sample portion?
                           loadFtab,    // load the ftab & eftab?
                           loadRstarts, // load the rstarts array?
                           true,        // stop after loading the header portion?
                           &_gh,        // params
                           mmSweep,     // mmSweep
                           loadNames,   // loadNames
                           startVerbose); // startVerbose
            // If the offRate has been overridden, reflect that in the
            // _eh._offRate field
            if(offRatePlus > 0 && _overrideOffRate == -1) {
                _overrideOffRate = _gh._offRate + offRatePlus;
            }
            if(_overrideOffRate > _gh._offRate) {
                _gh.setOffRate(_overrideOffRate);
                assert_eq(_overrideOffRate, _gh._offRate);
            }
        }

        // Read ALTs
        EList<ALT<index_t> >& alts = altdb->alts();
        EList<Haplotype<index_t> >& haplotypes = altdb->haplotypes();
        EList<string>& altnames = altdb->altnames();
        alts.clear(); altnames.clear();
        string in7Str = in + ".7." + gfm_ext;
        string in8Str = in + ".8." + gfm_ext;
        
        // open alts
        if(verbose || startVerbose) cerr << "Opening \"" << in7Str.c_str() << "\"" << endl;
        ifstream in7(in7Str.c_str(), ios::binary);
        if(!in7.good()) {
            cerr << "Could not open index file " << in7Str.c_str() << endl;
        }
        
        EList<index_t> to_alti;
        index_t to_alti_far = 0;
        readI32(in7, this->toBe());
        index_t numAlts = readIndex<index_t>(in7, this->toBe());


        // open altnames
        if(verbose || startVerbose) cerr << "Opening \"" << in8Str.c_str() << "\"" << endl;
        ifstream in8(in8Str.c_str(), ios::binary);
        if(!in8.good()) {
            cerr << "Could not open index file " << in8Str.c_str() << endl;
        }

        readI32(in8, this->toBe());
        index_t numAltnames = readIndex<index_t>(in8, this->toBe());

        assert_eq(numAlts, numAltnames);

        if(numAlts > 0) {
            alts.resizeExact(numAlts); alts.clear();
            to_alti.resizeExact(numAlts); to_alti.clear();
            while(!in7.eof() && !in8.eof()) {
                alts.expand();
                alts.back().read(in7, this->toBe());
                to_alti.push_back(to_alti_far);
                to_alti_far++;

                altnames.expand();
                in8 >> altnames.back();

                if(!loadSpliceSites) {
                    if(alts.back().splicesite()) {
                        alts.pop_back();
                        assert_gt(numAlts, 0);
                        altnames.pop_back();
                        assert_gt(numAltnames, 0);
                        numAlts--;
                        numAltnames--;
                        to_alti.back() = std::numeric_limits<index_t>::max();
                        to_alti_far--;
                    }
                }
                if(alts.size() == numAlts) break;
            }
        }
        assert_eq(alts.size(), numAlts);
        assert_eq(to_alti_far, numAlts);
        assert_eq(alts.size(), altnames.size());
        // Check if it hits the end of file, and this routine is needed for backward compatibility
        if(in7.peek() != std::ifstream::traits_type::eof()) {
            index_t numHaplotypes = readIndex<index_t>(in7, this->toBe());
            if(numHaplotypes > 0) {
                haplotypes.resizeExact(numHaplotypes);
                haplotypes.clear();
                while(!in7.eof()) {
                    haplotypes.expand();
                    haplotypes.back().read(in7, this->toBe());
                    Haplotype<index_t>& ht = haplotypes.back();
                    for(index_t h = 0; h < ht.alts.size(); h++) {
                        ht.alts[h] = to_alti[ht.alts[h]];
                    }
                    if(haplotypes.size() == numHaplotypes) break;
                }
            }
            if(!useHaplotype) {
                haplotypes.nullify();
            }
        }

        // Read repeats
        _repeat = false;
        if(repeatdb != NULL) {
            _repeat = true;

			// Number of repeat groups in the index
            index_t numRepeatIndex = readIndex<index_t>(in7, this->toBe());
            assert_gt(numRepeatIndex, 0);
            EList<pair<index_t, index_t> > repeatLens; repeatLens.resizeExact(numRepeatIndex);

            for(size_t k = 0; k < numRepeatIndex; k++) {
                repeatLens[k].first = readIndex<index_t>(in7, this->toBe());
                repeatLens[k].second = readIndex<index_t>(in7, this->toBe());
            }

			if (readLens != NULL && !readLens->empty()) {
				// Load subset of repeat groups.
				size_t k = 0;
				size_t k2 = 0;

				_repeatIncluded.resizeExact(numRepeatIndex);
				_repeatIncluded.fillZero();

				while(k < numRepeatIndex && k2 < readLens->size()) {
					if (repeatLens[k].first >= (*readLens)[k2]) {
						_repeatIncluded[k] = true;
						k2++;
					} else {
						k++;
					}
				}

				// at least last repeat group is included
				_repeatIncluded[numRepeatIndex - 1] = true;

				_repeatLens.clear();
				for(size_t i = 0; i < numRepeatIndex; i++) {
					if (_repeatIncluded[i]) {
						_repeatLens.push_back(repeatLens[i]);
					}
				}
			} else {
				// Load all repeat groups
				_repeatLens = repeatLens;
				_repeatIncluded.resizeExact(numRepeatIndex);
				_repeatIncluded.fill(true);
			}

            repeatdb->read(in7, this->toBe(), _repeatIncluded);
            index_t numKmertables = readIndex<index_t>(in7, this->toBe());
            EList<streampos> filePos; filePos.resizeExact(numKmertables);
            for(size_t k = 0; k < numKmertables; k++) {
                filePos[k] = readIndex<uint64_t>(in7, this->toBe());
            }
            for(size_t k = 0; k < numKmertables; k++) {
                if(!_repeatIncluded[k])
                    continue;
                if(k > 0) {
                    in7.seekg(filePos[k-1]);
                }
                _repeat_kmertables.expand();
                _repeat_kmertables.back().read(in7, this->toBe());
            }
            in7.seekg(filePos.back());
        }
        
        in7.close();
        in8.close();
        
        // Sort SNPs and Splice Sites based on positions
        index_t nalts = (index_t)alts.size();
        for(index_t s = 0; s < nalts; s++) {
            ALT<index_t> alt = alts[s];
            if(alt.snp()) altdb->setSNPs(true);
            if(alt.exon()) altdb->setExons(true);
            if(alt.splicesite()) {
                altdb->setSpliceSites(true);
                alts.push_back(alt);
                alts.back().left = alt.right;
                alts.back().right = alt.left;
                altnames.push_back("ssr");
            } else if(alt.deletion()) {
                alts.push_back(alt);
                alts.back().pos = alt.pos + alt.len - 1;
                alts.back().reversed = true;
                string altname = altnames[s];
                altnames.push_back(altname);
            }
        }
        if(alts.size() > 1 && alts.size() > nalts) {
            assert_eq(alts.size(), altnames.size());
            EList<pair<ALT<index_t>, index_t> > buf; buf.resize(alts.size());
            EList<string> buf2; buf2.resize(alts.size());
            for(size_t i = 0; i < alts.size(); i++) {
                buf[i].first = alts[i];
                buf[i].second = (index_t)i;
                buf2[i] = altnames[i];
            }
            buf.sort();
            for(size_t i = 0; i < alts.size(); i++) {
                alts[i] = buf[i].first;
                altnames[i] = buf2[buf[i].second];
                if(buf[i].second < numAlts) {
                    to_alti[buf[i].second] = i;
                }
            }
        }
        
        if(useHaplotype) {
            EList<index_t>& haplotype_maxrights = altdb->haplotype_maxrights();
            haplotype_maxrights.resizeExact(haplotypes.size());
            for(index_t h = 0; h < haplotypes.size(); h++) {
                Haplotype<index_t>& ht = haplotypes[h];
                for(index_t h2 = 0; h2 < ht.alts.size(); h2++) {
                    ht.alts[h2] = to_alti[ht.alts[h2]];
                }
                if(h == 0) {
                    haplotype_maxrights[h] = ht.right;
                } else {
                    haplotype_maxrights[h] = std::max<index_t>(haplotype_maxrights[h - 1], ht.right);
                }
            }
        }
        
        assert(repeatdb != NULL || repOk());
	}
	
	/// Construct an Ebwt from the given header parameters and string
	/// vector, optionally using a blockwise suffix sorter with the
	/// given 'bmax' and 'dcv' parameters.  The string vector is
	/// ultimately joined and the joined string is passed to buildToDisk().
	GFM(
		 bool packed,
		 int needEntireReverse,
		 int32_t lineRate,
		 int32_t offRate,
		 int32_t ftabChars,
		 const string& file,   // base filename for GFM files
		 bool fw,
		 int dcv,
		 EList<RefRecord>& szs,
		 index_t sztot,
		 const RefReadInParams& refparams,
		 uint32_t seed,
		 int32_t overrideOffRate = -1,
		 bool verbose = false,
		 bool passMemExc = false,
		 bool sanityCheck = false) :
	GFM_INITS,
	_gh(
		joinedLen(szs),
        0,
        0,
		lineRate,
		offRate,
		ftabChars,
        0,
		refparams.reverse == REF_READ_REVERSE)
	{
#ifdef POPCNT_CAPABILITY
        ProcessorSupport ps;
        _usePOPCNTinstruction = ps.POPCNTenabled();
#endif
		packed_ = packed;
	}

	/// Construct an Ebwt from the given header parameters and string
	/// vector, optionally using a blockwise suffix sorter with the
	/// given 'bmax' and 'dcv' parameters.  The string vector is
	/// ultimately joined and the joined string is passed to buildToDisk().
	template<typename TStr>
	GFM(
		TStr& s,
		bool packed,
		int needEntireReverse,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
        int nthreads,
        const string& snpfile,
        const string& htfile,
        const string& ssfile,
        const string& exonfile,
        const string& svfile,
        const string& repeatfile,
		const string& outfile,   // base filename for GFM files
		bool fw,
		bool useBlockwise,
		index_t bmax,
		index_t bmaxSqrtMult,
		index_t bmaxDivN,
		int dcv,
		EList<FileBuf*>& is,
		EList<RefRecord>& szs,
		index_t sztot,
		const RefReadInParams& refparams,
        EList<RefRecord>* parent_szs,
        EList<string>* parent_refnames,
		uint32_t seed,
		int32_t overrideOffRate = -1,
		bool verbose = false,
		bool passMemExc = false,
		bool sanityCheck = false) :
		GFM_INITS,
		_gh(
			joinedLen(szs),
            0,
            0,
			lineRate,
			offRate,
			ftabChars,
            0,
			refparams.reverse == REF_READ_REVERSE)
	{
        assert_gt(nthreads, 0);
        _nthreads = nthreads;
#ifdef POPCNT_CAPABILITY
        ProcessorSupport ps;
        _usePOPCNTinstruction = ps.POPCNTenabled();
#endif
		_in1Str = outfile + ".1." + gfm_ext;
		_in2Str = outfile + ".2." + gfm_ext;
		packed_ = packed;
		// Open output files
		ofstream fout1(_in1Str.c_str(), ios::binary);
		if(!fout1.good()) {
			cerr << "Could not open index file for writing: \"" << _in1Str.c_str() << "\"" << endl
			     << "Please make sure the directory exists and that permissions allow writing by" << endl
			     << "HISAT2." << endl;
			throw 1;
		}
		ofstream fout2(_in2Str.c_str(), ios::binary);
		if(!fout2.good()) {
			cerr << "Could not open index file for writing: \"" << _in2Str.c_str() << "\"" << endl
			     << "Please make sure the directory exists and that permissions allow writing by" << endl
			     << "HISAT2." << endl;
			throw 1;
		}

		// Build
		initFromVector<TStr>(
							 s,
                             snpfile,
                             htfile,
                             ssfile,
                             exonfile,
                             svfile,
                             repeatfile,
							 is,
							 szs,
							 sztot,
							 refparams,
							 fout1,
							 fout2,
                             outfile,
							 useBlockwise,
							 bmax,
							 bmaxSqrtMult,
							 bmaxDivN,
							 dcv,
                             parent_szs,
                             parent_refnames,
							 seed,
							 verbose);
		// Close output files
		fout1.flush();
		int64_t tellpSz1 = (int64_t)fout1.tellp();
		VMSG_NL("Wrote " << fout1.tellp() << " bytes to primary GFM file: " << _in1Str.c_str());
		fout1.close();
		bool err = false;
		if(tellpSz1 > fileSize(_in1Str.c_str())) {
			err = true;
			cerr << "Index is corrupt: File size for " << _in1Str.c_str() << " should have been " << tellpSz1
			     << " but is actually " << fileSize(_in1Str.c_str()) << "." << endl;
		}
		fout2.flush();
		int64_t tellpSz2 = (int64_t)fout2.tellp();
		VMSG_NL("Wrote " << fout2.tellp() << " bytes to secondary GFM file: " << _in2Str.c_str());
		fout2.close();
		if(tellpSz2 > fileSize(_in2Str.c_str())) {
			err = true;
			cerr << "Index is corrupt: File size for " << _in2Str.c_str() << " should have been " << tellpSz2
			     << " but is actually " << fileSize(_in2Str.c_str()) << "." << endl;
		}
		if(err) {
			cerr << "Please check if there is a problem with the disk or if disk is full." << endl;
			throw 1;
		}
		// Reopen as input streams
		VMSG_NL("Re-opening _in1 and _in2 as input streams");
		if(_sanity) {
			VMSG_NL("Sanity-checking Bt2");
			assert(!isInMemory());
			readIntoMemory(
				fw ? -1 : needEntireReverse, // 1 -> need the reverse to be reverse-of-concat
				true,                        // load SA sample (_offs[])?
				true,                        // load ftab (_ftab[] & _eftab[])?
				true,                        // load r-starts (_rstarts[])?
				false,                       // just load header?
				NULL,                        // Params object to fill
				false,                       // mm sweep?
				true,                        // load names?
				false);                      // verbose startup?
			// sanityCheckAll(refparams.reverse);
			evictFromMemory();
			assert(!isInMemory());
		}
		VMSG_NL("Returning from GFM constructor");
	}
	
	/**
	 * Static constructor for a pair of forward/reverse indexes for the
	 * given reference string.
	 */
	template<typename TStr>
	static pair<GFM*, GFM*>
	fromString(
		const char* str,
		bool packed,
		int reverse,
		bool bigEndian,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
		const string& file,
		bool useBlockwise,
		index_t bmax,
		index_t bmaxSqrtMult,
		index_t bmaxDivN,
		int dcv,
		uint32_t seed,
		bool verbose,
		bool autoMem,
		bool sanity)
	{
		EList<std::string> strs(EBWT_CAT);
		strs.push_back(std::string(str));
		return fromStrings<TStr>(
			strs,
			packed,
			reverse,
			bigEndian,
			lineRate,
			offRate,
			ftabChars,
			file,
			useBlockwise,
			bmax,
			bmaxSqrtMult,
			bmaxDivN,
			dcv,
			seed,
			verbose,
			autoMem,
			sanity);
	}
	
	/**
	 * Static constructor for a pair of forward/reverse indexes for the
	 * given list of reference strings.
	 */
	template<typename TStr>
	static pair<GFM*, GFM*>
	fromStrings(
		const EList<std::string>& strs,
		bool packed,
		int reverse,
		bool bigEndian,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
		const string& file,
		bool useBlockwise,
		index_t bmax,
		index_t bmaxSqrtMult,
		index_t bmaxDivN,
		int dcv,
		uint32_t seed,
		bool verbose,
		bool autoMem,
		bool sanity)
	{
        assert(!strs.empty());
		EList<FileBuf*> is(EBWT_CAT);
		RefReadInParams refparams(false /* color */, REF_READ_FORWARD, false, false);
		// Adapt sequence strings to stringstreams open for input
		auto_ptr<stringstream> ss(new stringstream());
		for(index_t i = 0; i < strs.size(); i++) {
			(*ss) << ">" << i << endl << strs[i] << endl;
		}
		auto_ptr<FileBuf> fb(new FileBuf(ss.get()));
		assert(!fb->eof());
		assert(fb->get() == '>');
		ASSERT_ONLY(fb->reset());
		assert(!fb->eof());
		is.push_back(fb.get());
		// Vector for the ordered list of "records" comprising the input
		// sequences.  A record represents a stretch of unambiguous
		// characters in one of the input sequences.
		EList<RefRecord> szs(EBWT_CAT);
		std::pair<index_t, index_t> sztot;
		sztot = BitPairReference::szsFromFasta(is,
                                               file,
                                               bigEndian,
                                               refparams,
                                               szs,
                                               sanity);
		// Construct Ebwt from input strings and parameters
		GFM<index_t> *gfmFw = new GFM<index_t>(
                                               TStr(),
                                               packed,
                                               -1,           // fw
                                               lineRate,
                                               offRate,      // suffix-array sampling rate
                                               ftabChars,    // number of chars in initial arrow-pair calc
                                               file,         // basename for .?.ebwt files
                                               true,         // fw?
                                               useBlockwise, // useBlockwise
                                               bmax,         // block size for blockwise SA builder
                                               bmaxSqrtMult, // block size as multiplier of sqrt(len)
                                               bmaxDivN,     // block size as divisor of len
                                               dcv,          // difference-cover period
                                               is,           // list of input streams
                                               szs,          // list of reference sizes
                                               sztot.first,  // total size of all unambiguous ref chars
                                               refparams,    // reference read-in parameters
                                               seed,         // pseudo-random number generator seed
                                               -1,           // override offRate
                                               verbose,      // be talkative
                                               autoMem,      // pass exceptions up to the toplevel so that we can adjust memory settings automatically
                                               sanity);      // verify results and internal consistency
		refparams.reverse = reverse;
		szs.clear();
		sztot = BitPairReference::szsFromFasta(is,
                                               file,
                                               bigEndian,
                                               refparams,
                                               szs,
                                               sanity);
		// Construct Ebwt from input strings and parameters
		GFM<index_t> *gfmBw = new GFM<index_t>(
                                               TStr(),
                                               packed,
                                               reverse == REF_READ_REVERSE,
                                               lineRate,
                                               offRate,      // suffix-array sampling rate
                                               ftabChars,    // number of chars in initial arrow-pair calc
                                               file + ".rev",// basename for .?.ebwt files
                                               false,        // fw?
                                               useBlockwise, // useBlockwise
                                               bmax,         // block size for blockwise SA builder
                                               bmaxSqrtMult, // block size as multiplier of sqrt(len)
                                               bmaxDivN,     // block size as divisor of len
                                               dcv,          // difference-cover period
                                               is,           // list of input streams
                                               szs,          // list of reference sizes
                                               sztot.first,  // total size of all unambiguous ref chars
                                               refparams,    // reference read-in parameters
                                               seed,         // pseudo-random number generator seed
                                               -1,           // override offRate
                                               verbose,      // be talkative
                                               autoMem,      // pass exceptions up to the toplevel so that we can adjust memory settings automatically
                                               sanity);      // verify results and internal consistency
        return make_pair(gfmFw, gfmBw);
	}
	
	/// Return true iff the Ebwt is packed
	bool isPacked() { return packed_; }

	/**
	 * Write the rstarts array given the szs array for the reference.
	 */
	void szsToDisk(const EList<RefRecord>& szs, ostream& os, int reverse);

    bool checkPosToSzs(const EList<RefRecord>& szs, index_t start_idx, index_t pos)
    {
        assert(szs[start_idx].first);
        for(index_t i = start_idx; i < szs.size(); i++) {
            if((i != start_idx) && (szs[i].first)) {
                // span to next chr
                return false;
            }

            if(pos < szs[i].off) {
                return false;
            } else {
                pos -= szs[i].off;
                if(pos < szs[i].len) {
                    return true;
                }
                pos -= szs[i].len;
            }
        }
        assert(false);
        return false;
    }

	/**
	 * Helper for the constructors above.  Takes a vector of text
	 * strings and joins them into a single string with a call to
	 * joinToDisk, which does a join (with padding) and writes some of
	 * the resulting data directly to disk rather than keep it in
	 * memory.  It then constructs a suffix-array producer (what kind
	 * depends on 'useBlockwise') for the resulting sequence.  The
	 * suffix-array producer can then be used to obtain chunks of the
	 * joined string's suffix array.
	 */
	template <typename TStr>
	void initFromVector(TStr& s,
                        const string& snpfile,
                        const string& htfile,
                        const string& ssfile,
                        const string& exonfile,
                        const string& svfile,
                        const string& repeatfile,
						EList<FileBuf*>& is,
	                    EList<RefRecord>& szs,
	                    index_t sztot,
	                    const RefReadInParams& refparams,
	                    ofstream& out1,
	                    ofstream& out2,
                        const string& outfile,
	                    bool useBlockwise,
	                    index_t bmax,
	                    index_t bmaxSqrtMult,
	                    index_t bmaxDivN,
	                    int dcv,
                        EList<RefRecord>* parent_szs,
                        EList<string>* parent_refnames,
	                    uint32_t seed,
						bool verbose)
	{
		// Compose text strings into single string
		VMSG_NL("Calculating joined length");
		index_t jlen;
		jlen = joinedLen(szs);
        _repeat = (parent_szs != NULL);
		assert_geq(jlen, sztot);
		VMSG_NL("Writing header");
		writeFromMemory(true, out1, out2);
		try {
			VMSG_NL("Reserving space for joined string");
			s.resize(jlen);
			VMSG_NL("Joining reference sequences");
			if(refparams.reverse == REF_READ_REVERSE) {
				{
					Timer timer(cerr, "  Time to join reference sequences: ", _verbose);
					joinToDisk(is, szs, sztot, refparams, s, out1, out2);
				} {
					Timer timer(cerr, "  Time to reverse reference sequence: ", _verbose);
					EList<RefRecord> tmp(EBWT_CAT);
					s.reverse();
					reverseRefRecords(szs, tmp, false, verbose);
					szsToDisk(tmp, out1, refparams.reverse);
				}
			} else {
				Timer timer(cerr, "  Time to join reference sequences: ", _verbose);
				joinToDisk(is, szs, sztot, refparams, s, out1, out2);
				szsToDisk(szs, out1, refparams.reverse);
			}
            
            {
                Timer timer(cerr, "  Time to read SNPs and splice sites: ", _verbose);
                _alts.clear();
                _altnames.clear();
                EList<pair<index_t, index_t> > chr_szs;
                index_t tmp_len = 0;
                for(index_t i = 0; i < szs.size(); i++) {
                    if(szs[i].first) {
                        chr_szs.expand();
                        chr_szs.back().first = tmp_len;
                        chr_szs.back().second = i;
                    }
                    tmp_len += (index_t)szs[i].len;
                }
                
                // Write SNPs into 7.ht2 and 8.ht2
                string file7 = outfile + ".7." + gfm_ext;
                string file8 = outfile + ".8." + gfm_ext;
                
                // Open output stream for the '.7.gfm_ext' file which will
                // hold SNPs (except IDs).
                ofstream fout7(file7.c_str(), ios::binary);
                if(!fout7.good()) {
                    cerr << "Could not open index file for writing: \"" << file7.c_str() << "\"" << endl
                    << "Please make sure the directory exists and that permissions allow writing by" << endl
                    << "HISAT2." << endl;
                    throw 1;
                }
                // Open output stream for the '.8.gfm_ext' file which will
                // hold SNP IDs.
                ofstream fout8(file8.c_str(), ios::binary);
                if(!fout8.good()) {
                    cerr << "Could not open index file for writing: \"" << file8.c_str() << "\"" << endl
                    << "Please make sure the directory exists and that permissions allow writing by" << endl
                    << "HISAT2." << endl;
                    throw 1;
                }
                writeIndex<int32_t>(fout7, 1, this->toBe()); // endianness sentinel
                writeIndex<int32_t>(fout8, 1, this->toBe()); // endianness sentinel

                for(index_t i = 0; i < _refnames.size(); i++) {
                    _refnames_nospace.push_back("");
                    for(index_t j = 0; j < _refnames[i].size(); j++) {
                        char c = _refnames[i][j];
                        if(c == ' ') break;
                        _refnames_nospace.back().push_back(c);
                    }
                }
                
                map<string, index_t> snpID2num;
                if(snpfile != "") {
                    ifstream snp_file(snpfile.c_str(), ios::in);
                    if(!snp_file.is_open()) {
                        cerr << "Error: could not open " << snpfile.c_str() << endl;
                        throw 1;
                    }
                    
                    while(!snp_file.eof()) {
                        // rs73387790	single	22:20000001-21000000	145	A
                        string snp_id;
                        snp_file >> snp_id;
                        if(snp_id.empty() || snp_id[0] == '#') {
                            string line;
                            getline(snp_file, line);
                            continue;
                        }
                        string type, chr;
                        index_t genome_pos;
                        char snp_ch = '\0';
                        string ins_seq;
                        index_t del_len = 0;
                        snp_file >> type >> chr >> genome_pos;
                        if(type == "single") {
                            snp_file >> snp_ch;
                        } else if(type == "deletion") {
                            snp_file >> del_len;
                        } else if(type == "insertion") {
                            snp_file >> ins_seq;
                        }
                        index_t chr_idx = 0;
                        for(; chr_idx < _refnames_nospace.size(); chr_idx++) {
                            if(chr == _refnames_nospace[chr_idx])
                                break;
                        }
                        if(chr_idx >= _refnames_nospace.size()) {
                            continue;
                        }
                        assert_eq(chr_szs.size(), _refnames_nospace.size());
                        assert_lt(chr_idx, chr_szs.size());
                        pair<index_t, index_t> tmp_pair = chr_szs[chr_idx];
                        const index_t sofar_len = tmp_pair.first;
                        const index_t szs_idx = tmp_pair.second;
                        bool involve_Ns = false;
                        index_t pos = genome_pos;
                        index_t add_pos = 0;
                        assert(szs[szs_idx].first);
                        for(index_t i = szs_idx; i < szs.size(); i++) {
                            if(i != szs_idx && szs[i].first) {
                                break;
                            }
                            if(pos < szs[i].off) {
                                involve_Ns = true;
                                break;
                            } else {
                                pos -= szs[i].off;
                                if(pos == 0) {
                                   if(type == "deletion" || type == "insertion") {
                                       involve_Ns = true;
                                       break;
                                   }
                                }
                                if(pos < szs[i].len) {
                                    break;
                                } else {
                                    pos -= szs[i].len;
                                    add_pos += szs[i].len;
                                }
                            }
                        }
                        
                        if(involve_Ns) {
                            continue;
                        }
                        pos = sofar_len + add_pos + pos;
                        if(chr_idx + 1 < chr_szs.size()) {
                            if(pos >= chr_szs[chr_idx + 1].first) {
                                continue;
                            }
                        } else {
                            if(pos >= jlen){
                                continue;
                            }
                        }
                        
                        _alts.expand();
                        ALT<index_t>& snp = _alts.back();
                        snp.pos = pos;
                        if(type == "single") {
                            snp.type = ALT_SNP_SGL;
                            snp_ch = toupper(snp_ch);
                            if(snp_ch != 'A' && snp_ch != 'C' && snp_ch != 'G' && snp_ch != 'T') {
                                _alts.pop_back();
                                continue;
                            }
                            uint64_t bp = asc2dna[(int)snp_ch];
                            assert_lt(bp, 4);
                            if((int)bp == s[pos]) {
                                if (!threeN) {
                                    cerr << "Warning: single type should have a different base than " << "ACGTN"[(int)s[pos]]
                                         << " (" << snp_id << ") at " << genome_pos << " on " << chr << endl;
                                }
                                _alts.pop_back();
                                continue;
                                // throw 1;
                            }
                            snp.len = 1;
                            snp.seq = bp;
                        } else if(type == "deletion") {
                            snp.type = ALT_SNP_DEL;
                            snp.len = del_len;
                            snp.seq = 0;
                            snp.reversed = false;
                        } else if(type == "insertion") {
                            snp.type = ALT_SNP_INS;
                            snp.len = (index_t)ins_seq.size();
                            if(snp.len > sizeof(snp.seq) * 4) {
                                _alts.pop_back();
                                continue;
                            }
                            snp.seq = 0;
                            bool failed = false;
                            for(size_t i = 0; i < ins_seq.size(); i++) {
                                char ch = toupper(ins_seq[i]);
                                if(ch != 'A' && ch != 'C' && ch != 'G' && ch != 'T') {
                                    failed = true;
                                    break;
                                }
                                uint64_t bp = asc2dna[(int)ch];
                                assert_lt(bp, 4);
                                snp.seq = (snp.seq << 2) | bp;
                            }
                            if(failed) {
                                _alts.pop_back();
                                continue;
                            }
                        } else {
                            cerr << "Error: unknown snp type " << type << endl;
                            throw 1;
                        }
                        _altnames.push_back(snp_id);
                        assert_eq(_alts.size(), _altnames.size());
                        snpID2num[snp_id] = (index_t)_alts.size() - 1;
                    }
                    snp_file.close();
                    assert_eq(_alts.size(), _altnames.size());
                }
                
                _haplotypes.clear();
                if(_alts.size() > 0 && htfile != "") {
                    ifstream ht_file(htfile.c_str(), ios::in);
                    if(!ht_file.is_open()) {
                        cerr << "Error: could not open "<< htfile.c_str() << endl;
                        throw 1;
                    }
                    
                    while(!ht_file.eof()) {
                        // ht66    A*01:01:01:01   371     533     66,69,72,75,76,77,84,88,90,92,95
                        string ht_id;
                        ht_file >> ht_id;
                        if(ht_id.empty() || ht_id[0] == '#') {
                            string line;
                            getline(ht_file, line);
                            continue;
                        }
                        string chr, alt_list;
                        index_t left, right;  // inclusive [left, right]
                        ht_file >> chr >> left >> right >> alt_list;
                        assert_leq(left, right);
                        index_t chr_idx = 0;
                        for(; chr_idx < _refnames_nospace.size(); chr_idx++) {
                            if(chr == _refnames_nospace[chr_idx])
                                break;
                        }
                        if(chr_idx >= _refnames_nospace.size()) {
                            continue;
                        }
                        assert_eq(chr_szs.size(), _refnames_nospace.size());
                        assert_lt(chr_idx, chr_szs.size());
                        pair<index_t, index_t> tmp_pair = chr_szs[chr_idx];
                        const index_t sofar_len = tmp_pair.first;
                        const index_t szs_idx = tmp_pair.second;
                        bool inside_Ns = false;
                        index_t add_pos = 0;
                        assert(szs[szs_idx].first);
                        for(index_t i = szs_idx; i < szs.size(); i++) {
                            if(i != szs_idx && szs[i].first) break;
                            if(left < szs[i].off) {
                                inside_Ns = true;
                                break;
                            } else {
                                left -= szs[i].off;
                                right -= szs[i].off;
                                if(left < szs[i].len) {
                                    if(right >= szs[i].len) {
                                        inside_Ns = true;
                                    }
                                    break;
                                } else {
                                    left -= szs[i].len;
                                    right -= szs[i].len;
                                    add_pos += szs[i].len;
                                }
                            }
                        }
                        if(inside_Ns) {
                            continue;
                        }
                        left = sofar_len + add_pos + left;
                        right = sofar_len + add_pos + right;
                        if(chr_idx + 1 < chr_szs.size()) {
                            if(right >= chr_szs[chr_idx + 1].first) {
                                continue;
                            }
                        } else {
                            if(right >= jlen) {
                                continue;
                            }
                        }
                        _haplotypes.expand();
                        _haplotypes.back().left = left;
                        _haplotypes.back().right = right;
                        EList<string> alts;
                        tokenize(alt_list, ",", alts);
                        assert_gt(alts.size(), 0);
                        _haplotypes.back().alts.clear();
                        for(size_t i = 0; i < alts.size(); i++) {
                            const string& alt = alts[i];
                            if(snpID2num.find(alt) != snpID2num.end()) {
                                _haplotypes.back().alts.push_back(snpID2num[alt]);
                            }
                        }
                        if(_haplotypes.back().alts.size() <= 0) {
                            _haplotypes.pop_back();
                        }                        
                    }
                    _haplotypes.sort();
                    ht_file.close();
                } else {
                    for(index_t a = 0; a < _alts.size(); a++) {
                        const ALT<index_t>& alt = _alts[a];
                        if(!alt.snp()) continue;
                        _haplotypes.expand();
                        _haplotypes.back().left = alt.pos;
                        if(alt.deletion()) {
                            _haplotypes.back().right = alt.pos + alt.len - 1;
                        } else {
                            _haplotypes.back().right = alt.pos;
                        }
                        _haplotypes.back().alts.clear();
                        _haplotypes.back().alts.push_back(a);
                    }
                }
                
                if(ssfile != "") {
                    ifstream ss_file(ssfile.c_str(), ios::in);
                    if(!ss_file.is_open()) {
                        cerr << "Error: could not open " << ssfile.c_str() << endl;
                        throw 1;
                    }
                    map<uint64_t, uint64_t> ss_seq;
                    while(!ss_file.eof()) {
                        // 22	16062315	16062810	+
                        string chr;
                        ss_file >> chr;
                        if(chr.empty() || chr[0] == '#') {
                            string line;
                            getline(ss_file, line);
                            continue;
                        }
                        index_t left, right;
                        char strand;
                        ss_file >> left >> right >> strand;
                        // Convert exonic position to intronic position
                        left += 1; right -= 1;
                        if(left >= right) continue;
                        index_t chr_idx = 0;
                        for(; chr_idx < _refnames_nospace.size(); chr_idx++) {
                            if(chr == _refnames_nospace[chr_idx])
                                break;
                        }
                        if(chr_idx >= _refnames_nospace.size()) continue;
                        assert_eq(chr_szs.size(), _refnames_nospace.size());
                        assert_lt(chr_idx, chr_szs.size());
                        pair<index_t, index_t> tmp_pair = chr_szs[chr_idx];
                        const index_t sofar_len = tmp_pair.first;
                        const index_t szs_idx = tmp_pair.second;

                        // check whether ambiguous base is in exon's last and first base
                        if(!checkPosToSzs(szs, szs_idx, left - 1)
                                || !checkPosToSzs(szs, szs_idx, right + 1)) {
                            //cerr << "Skip ss. " << chr << ", " << left - 1 << ", " << right + 1 << endl;
                            continue;
                        }

                        bool inside_Ns = false;
                        index_t add_pos = 0;
                        assert(szs[szs_idx].first);
                        for(index_t i = szs_idx; i < szs.size(); i++) {
                            if(i != szs_idx && szs[i].first) break;
                            if(left < szs[i].off) {
                                inside_Ns = true;
                                break;
                            } else {
                                left -= szs[i].off;
                                right -= szs[i].off;
                                if(left < szs[i].len) {
                                    if(right >= szs[i].len) {
                                        inside_Ns = true;
                                    }
                                    break;
                                } else {
                                    left -= szs[i].len;
                                    right -= szs[i].len;
                                    add_pos += szs[i].len;
                                }
                            }
                        }
                        if(inside_Ns) continue;
                        left = sofar_len + add_pos + left;
                        right = sofar_len + add_pos + right;
                        if(chr_idx + 1 < chr_szs.size()) {
                            if(right >= chr_szs[chr_idx + 1].first) continue;
                        } else {
                            if(right >= jlen) continue;
                        }
                        
                        // Avoid splice sites in repetitive sequences
                        // Otherwise, it will likely explode due to an exponential number of combinations
                        index_t seqlen = 16; assert_leq(seqlen, 16);
                        if(left >= seqlen && right + 1 + seqlen <= s.length()) {
                            uint64_t seq = 0;
                            for(index_t si = left - seqlen; si < left; si++) {
                                seq = seq << 2 | s[si];
                            }
                            for(index_t si = right + 1; si < right + 1 + seqlen; si++) {
                                seq = seq << 2 | s[si];
                            }
                            if(_alts.size() > 0) {
                                if(_alts.back().left == left &&
                                   _alts.back().right == right) continue;
                            }
                            if(ss_seq.find(seq) == ss_seq.end()) ss_seq[seq] = 1;
                            else                                 ss_seq[seq]++;
                        }
                        
                        _alts.expand();
                        ALT<index_t>& alt = _alts.back();
                        alt.type = ALT_SPLICESITE;
                        alt.left = left;
                        alt.right = right;
                        alt.fw = (strand == '+' ? true : false);
                        alt.excluded = false;
                        _altnames.push_back("ss");
                    }
                    ss_file.close();
                    assert_eq(_alts.size(), _altnames.size());
                    
                    for(size_t i = 0; i < _alts.size(); i++) {
                        ALT<index_t>& alt = _alts[i];
                        if(!alt.splicesite()) continue;
                        index_t seqlen = 16; assert_leq(seqlen, 16);
                        if(alt.left >= seqlen && alt.right + 1 + seqlen <= s.length()) {
                            uint64_t seq = 0;
                            for(index_t si = alt.left - seqlen; si < alt.left; si++) {
                                seq = seq << 2 | s[si];
                            }
                            for(index_t si = alt.right + 1; si < alt.right + 1 + seqlen; si++) {
                                seq = seq << 2 | s[si];
                            }
                            assert(ss_seq.find(seq) != ss_seq.end());
                            alt.excluded = ss_seq[seq] > 1;
                        }
                    }
                }
                
                if(exonfile != "") {
                    ifstream exon_file(exonfile.c_str(), ios::in);
                    if(!exon_file.is_open()) {
                        cerr << "Error: could not open " << ssfile.c_str() << endl;
                        throw 1;
                    }
                    while(!exon_file.eof()) {
                        // 22	16062156	16062315	+
                        string chr;
                        exon_file >> chr;
                        if(chr.empty() || chr[0] == '#') {
                            string line;
                            getline(exon_file, line);
                            continue;
                        }
                        index_t left, right;
                        char strand;
                        exon_file >> left >> right >> strand;
                        // Convert exonic position to intronic position
                        left += 1; right -= 1;
                        if(left >= right) continue;
                        index_t chr_idx = 0;
                        for(; chr_idx < _refnames_nospace.size(); chr_idx++) {
                            if(chr == _refnames_nospace[chr_idx])
                                break;
                        }
                        if(chr_idx >= _refnames_nospace.size()) continue;
                        assert_eq(chr_szs.size(), _refnames_nospace.size());
                        assert_lt(chr_idx, chr_szs.size());
                        pair<index_t, index_t> tmp_pair = chr_szs[chr_idx];
                        const index_t sofar_len = tmp_pair.first;
                        const index_t szs_idx = tmp_pair.second;
                        bool inside_Ns = false;
                        index_t add_pos = 0;
                        assert(szs[szs_idx].first);
                        for(index_t i = szs_idx; i < szs.size(); i++) {
                            if(i != szs_idx && szs[i].first) break;
                            if(left < szs[i].off) {
                                inside_Ns = true;
                                break;
                            } else {
                                left -= szs[i].off;
                                right -= szs[i].off;
                                if(left < szs[i].len) {
                                    if(right >= szs[i].len) {
                                        inside_Ns = true;
                                    }
                                    break;
                                } else {
                                    left -= szs[i].len;
                                    right -= szs[i].len;
                                    add_pos += szs[i].len;
                                }
                            }
                        }
                        if(inside_Ns) continue;
                        left = sofar_len + add_pos + left;
                        right = sofar_len + add_pos + right;
                        if(chr_idx + 1 < chr_szs.size()) {
                            if(right >= chr_szs[chr_idx + 1].first) continue;
                        } else {
                            if(right >= jlen) continue;
                        }
                        
                        _alts.expand();
                        ALT<index_t>& alt = _alts.back();
                        alt.type = ALT_EXON;
                        alt.left = left;
                        alt.right = right;
                        alt.fw = (strand == '+' ? true : false);
                        _altnames.push_back("exon");
                    }
                    exon_file.close();
                }
                
                // Todo - implement structural variations
                if(svfile != "") {
                    cerr << "Warning: SV option is not implemented " << svfile.c_str() << endl;
                }
                
                // Sort SNPs and Splice Sites based on positions
                if(_alts.size() > 1) {
                    assert_eq(_alts.size(), _altnames.size());
                    EList<pair<ALT<index_t>, index_t> > buf; buf.resize(_alts.size());
                    EList<string> buf2; buf2.resize(_alts.size());
                    for(size_t i = 0; i < _alts.size(); i++) {
                        buf[i].first = _alts[i];
                        buf[i].second = (index_t)i;
                        buf2[i] = _altnames[i];
                    }
                    buf.sort();
                    for(size_t i = 0; i < _alts.size(); i++) {
                        _alts[i] = buf[i].first;
                        _altnames[i] = buf2[buf[i].second];
                    }
                    
                    EList<index_t> buf3; buf3.resize(_alts.size());
                    for(size_t i = 0; i < buf3.size(); i++) {
                        index_t before = buf[i].second;
                        assert_lt(before, buf3.size());
                        buf3[before] = (index_t)i;
                    }
                    for(size_t h = 0; h < _haplotypes.size(); h++) {
                        EList<index_t, 1>& alts = _haplotypes[h].alts;
                        for(size_t a = 0; a < alts.size(); a++) {
                            index_t before = alts[a];
                            assert_lt(before, buf3.size());
                            alts[a] = buf3[before];
                        }
                    }
#ifndef NDEBUG
                    for(size_t i = 0; i < _alts.size(); i++) {
                        if(i + 1 < _alts.size()) {
                            assert(_alts[i] < _alts[i+1]);
                        }
                        const ALT<index_t>& alt = _alts[i];
                        if(alt.snp()) {
                            assert(_altnames[i] != "");
                        } else if(alt.splicesite()) {
                            assert(_altnames[i] == "ss");
                        } else if(alt.exon()) {
                            assert(_altnames[i] == "exon");
                        } else {
                            assert(false);
                        }
                    }
#endif
                }
                
                writeIndex<index_t>(fout7, (index_t)_alts.size(), this->toBe());
                writeIndex<index_t>(fout8, (index_t)_alts.size(), this->toBe());
                for(index_t i = 0; i < _alts.size(); i++) {
                    _alts[i].write(fout7, this->toBe());
                    fout8 << _altnames[i] << endl;
                }
                
                writeIndex<index_t>(fout7, (index_t)_haplotypes.size(), this->toBe());
                for(index_t i = 0; i < _haplotypes.size(); i++) {
                    _haplotypes[i].write(fout7, this->toBe());
                }
                
                EList<Repeat<index_t> >& repeats = _repeatdb.repeats();
                if(_repeat) {
                    ifstream repeat_file(repeatfile.c_str(), ios::in);
                    if(!repeat_file.is_open()) {
                        cerr << "Error: could not open " << ssfile.c_str() << endl;
                        throw 1;
                    }
                    if(parent_szs == NULL) {
                        throw 1;
                    }
                    if(parent_refnames == NULL) {
                        throw 1;
                    }

                    EList<pair<index_t, index_t> > parent_chr_szs;
                    index_t tmp_len = 0;
                    for(index_t i = 0; i < parent_szs->size(); i++) {
                        if((*parent_szs)[i].first) {
                            parent_chr_szs.expand();
                            parent_chr_szs.back().first = tmp_len;
                            parent_chr_szs.back().second = i;
                        }
                        tmp_len += (index_t)(*parent_szs)[i].len;
                    }
                    index_t parent_jlen = joinedLen(*parent_szs);

                    string prev_repName = "";
                    while(!repeat_file.eof()) {
                        // >rep1*0    rep    0    100    470    0
                        // 20_rep:26622650:+ 20_rep:26628088:+ 20_rep:26632508:+ 20_rep:26635636:+
                        // 20_rep:26669936:+ 20_rep:26672654:+ 20_rep:26675373:+ 20_rep:26678095:+
                        string repName, repAlleleName;
                        repeat_file >> repAlleleName;
                        if(repAlleleName.empty()) // Reached the end of file
                            break;
                        if(repAlleleName[0] != '>') {
                            cerr << "Error: the file format is not correct" << endl;
                            throw 1;
                        }
                        repAlleleName = repAlleleName.substr(1); // Remove '>'
                        index_t alleleID = 0;
                        size_t star_pos = repAlleleName.find('*');
                        if(star_pos >= repAlleleName.length()) {
                            repName = repAlleleName;
                        } else {
                            repName = repAlleleName.substr(0, star_pos);
                            string strID = repAlleleName.substr(star_pos + 1);
                            istringstream(strID) >> alleleID;
                        }

                        string refRepName;
                        index_t repPos, repLen;
                        repeat_file >> refRepName >> repPos >> repLen;
                        index_t rep_idx = 0;
                        for(; rep_idx < _refnames_nospace.size(); rep_idx++) {
                            if(refRepName == _refnames_nospace[rep_idx])
                                break;
                        }
                        if(rep_idx >= _refnames_nospace.size()) {
                            cerr << "Error: " << refRepName << " is not found in " << endl;
                            throw 1;
                        }

                        if(repeats.size() == 0 ||
                           repeats.back().repID != rep_idx ||
                           repeats.back().repName != repName) {
                            if(repeats.size() > 0) {
                                repeats.back().positions.sort();
                            }
                            repeats.expand();
                            repeats.back().init(repName, rep_idx, repPos, repLen);
                        }

                        // update repPos and repLen
                        if(repPos < repeats.back().repPos) {
                            repeats.back().repLen += (repeats.back().repPos - repPos);
                            repeats.back().repPos = repPos;
                        }
                        if(repPos + repLen > repeats.back().repPos + repeats.back().repLen) {
                            repeats.back().repLen = repPos + repLen - repeats.back().repPos;
                        }

                        size_t baseOff = 0;
                        if(repeats.size() > 1 && repeats[repeats.size() - 2].repID == rep_idx) {
                            baseOff = repeats[repeats.size() - 2].repPos + repeats[repeats.size() - 2].repLen;
                        }

                        index_t numCoords, numAlts;
                        repeat_file >> numCoords >> numAlts;
                        EList<index_t> snpIDs;
                        EList<string> snpStrIDs;
                        if(numAlts > 0) {
                            string snpStrID;
                            repeat_file >> snpStrID;
                            tokenize(snpStrID, ",", snpStrIDs);
                            if(snpStrIDs.size() != numAlts) {
                                assert(false);
                                cerr << "Error: the number of SNPs (" << snpIDs.size() << ", " << snpStrID << ") does not equal to " << numAlts << endl;
                                throw 1;
                            }
                            for(index_t i = 0; i < snpStrIDs.size(); i++) {
                                if(snpID2num.find(snpStrIDs[i]) == snpID2num.end()) {
                                    cerr << "Error: " << snpStrIDs[i] << " is not found" << endl;
                                    throw 1;
                                }
                                index_t numID = snpID2num[snpStrIDs[i]];
                                snpIDs.push_back(numID);
                            }
                        }

                        EList<RepeatCoord<index_t> >& positions = repeats.back().positions;
                        size_t sofar_numCoords = positions.size();
                        while(positions.size() - sofar_numCoords < numCoords) {
                            string chr_pos;
                            repeat_file >> chr_pos;
                            size_t colon_pos = chr_pos.find(':');
                            if(colon_pos + 1 >= chr_pos.length()) {
                                cerr << "Error: : is not found in " << chr_pos << endl;
                                throw 1;
                            }
                            string chr = chr_pos.substr(0, colon_pos);
                            string strPos = chr_pos.substr(colon_pos + 1, chr_pos.length() - colon_pos - 3);
                            bool repfw = (chr_pos[chr_pos.length() - 1] == '+');
                            index_t pos = 0;
                            istringstream(strPos) >> pos;
                            index_t chr_idx = 0;
                            for(; chr_idx < parent_refnames->size(); chr_idx++) {
                                if(chr == (*parent_refnames)[chr_idx])
                                    break;
                            }
                            if(chr_idx >= parent_refnames->size()) {
                                cerr << "Error: " << chr << " is not found in " << endl;
                                throw 1;
                            }
                            assert_eq(parent_chr_szs.size(), parent_refnames->size());
                            assert_lt(chr_idx, parent_chr_szs.size());

                            positions.expand();
                            positions.back().tid = chr_idx;
                            positions.back().toff = pos;
                            positions.back().fw = repfw;
                            positions.back().alleleID = alleleID;

                            pair<index_t, index_t> tmp_pair = parent_chr_szs[chr_idx];
                            const index_t sofar_len = tmp_pair.first;
                            const index_t szs_idx = tmp_pair.second;
                            bool involve_Ns = false;
                            index_t add_pos = 0;
                            assert((*parent_szs)[szs_idx].first);
                            for(index_t i = szs_idx; i < parent_szs->size(); i++) {
                                if(i != szs_idx && (*parent_szs)[i].first) {
                                    break;
                                }
                                if(pos < (*parent_szs)[i].off) {
                                    involve_Ns = true;
                                    break;
                                } else {
                                    pos -= (*parent_szs)[i].off;
                                    if(pos < (*parent_szs)[i].len) {
                                        break;
                                    } else {
                                        pos -= (*parent_szs)[i].len;
                                        add_pos += (*parent_szs)[i].len;
                                    }
                                }
                            }
                            if(involve_Ns) {
                                assert(false);
                                throw 1;
                            }
                            pos = sofar_len + add_pos + pos;
                            if(chr_idx + 1 < parent_chr_szs.size()) {
                                if(pos >= parent_chr_szs[chr_idx + 1].first) {
                                    assert(false);
                                    throw 1;
                                }
                            } else {
                                if(pos >= parent_jlen){
                                    assert(false);
                                    throw 1;
                                }
                            }

                            positions.back().joinedOff = pos;
                        }
                        repeats.back().alleles.expand();
                        assert_geq(repPos, baseOff);
                        repeats.back().alleles.back().init(repPos - baseOff, repLen);

                    }
                    if(repeats.size() > 0) {
                        repeats.back().positions.sort();
                    }
                    repeat_file.close();

                    index_t total_repeat_len = 0;
                    for(size_t r = 0; r + 1 < repeats.size(); r++) {
                        if(repeats[r].repID != repeats[r+1].repID) {
                            index_t repeat_len = repeats[r].repPos + repeats[r].repLen;
                            total_repeat_len += repeat_len;
                        }
                    }
                    index_t repeat_len = repeats.back().repPos + repeats.back().repLen;
                    total_repeat_len += repeat_len;
                    if(total_repeat_len != s.length()) {
                        cerr << "Error: repeat length (" << repeats.back().repPos + repeats.back().repLen;
                        cerr << ") does not match sequence length (" << s.length() << ")" << endl;
                        throw 1;
                    }

                    _repeatLens.resizeExact(szs.size());
                    for(size_t i = 0; i < _repeatLens.size(); i++) {
                        _repeatLens[i].first = numeric_limits<index_t>::max();
                        _repeatLens[i].second = 0;
                    }
                    for(size_t i = 0; i < repeats.size(); i++) {
                        index_t id = repeats[i].repID;
                        index_t len = repeats[i].repLen;
                        assert_lt(id, _repeatLens.size());
                        if(_repeatLens[id].first > len) {
                            _repeatLens[id].first = len;
                        }
                        if(_repeatLens[id].second < len) {
                            _repeatLens[id].second = len;
                        }
                    }

                    writeIndex<index_t>(fout7, _repeatLens.size(), this->toBe());
                    for(size_t i = 0; i < _repeatLens.size(); i++) {
                        writeIndex<index_t>(fout7, _repeatLens[i].first, this->toBe());
                        writeIndex<index_t>(fout7, _repeatLens[i].second, this->toBe());
                    }
                    _repeatdb.write(fout7, this->toBe());
                    writeIndex<index_t>(fout7, chr_szs.size(), this->toBe()); // number of repeat indexes
                    EList<string> seqs;
                    EList<streampos> tableFilePos;
                    streampos filepos = fout7.tellp();
                    for(size_t i = 0; i < chr_szs.size(); i++) {
                        writeIndex<uint64_t>(fout7, 0, this->toBe());
                    }
                    for(size_t i = 0; i < repeats.size(); i++) {
                        const Repeat<index_t>& repeat = repeats[i];
                        assert_lt(repeat.repID, chr_szs.size());
                        index_t template_len = 0;
                        if(repeat.repID + 1 < chr_szs.size()) {
                            template_len = chr_szs[repeat.repID + 1].first - chr_szs[repeat.repID].first;
                        } else {
                            template_len = s.length() - chr_szs[repeat.repID].first;
                        }
                        assert_leq(repeat.repPos + repeat.repLen, template_len);
                        index_t pos = chr_szs[repeat.repID].first + repeat.repPos;
                        assert_leq(pos + repeat.repLen, s.length());
                        seqs.expand();
                        seqs.back().clear();
                        for(index_t j = 0; j < repeat.repLen; j++) {
                            int c = s[pos + j];
                            assert_range(0, 3, c);
                            seqs.back().push_back("ACGT"[c]);
                        }

                        if(i + 1 == repeats.size() || repeats[i].repID != repeats[i+1].repID) {
                            const size_t w = RB_Minimizer<string>::default_w, k = RB_Minimizer<string>::default_k;
                            RB_KmerTable kmer_table;
                            kmer_table.build(seqs, w, k);
                            kmer_table.write(fout7, this->toBe());
                            seqs.clear();
                            tableFilePos.push_back(fout7.tellp());
                        }
                    }
                    assert_eq(tableFilePos.size(), chr_szs.size());
                    streampos origpos = fout7.tellp();
                    fout7.seekp(filepos);
                    for(size_t i = 0; i < tableFilePos.size(); i++) {
                        writeIndex<uint64_t>(fout7, tableFilePos[i], this->toBe());
                    }
                    fout7.seekp(origpos);
                }

                fout7.close();
                fout8.close();
            }
			// Joined reference sequence now in 's'
		} catch(bad_alloc& e) {
			// If we throw an allocation exception in the try block,
			// that means that the joined version of the reference
			// string itself is too larger to fit in memory.  The only
			// alternatives are to tell the user to give us more memory
			// or to try again with a packed representation of the
			// reference (if we haven't tried that already).
			cerr << "Could not allocate space for a joined string of " << jlen << " elements." << endl;
			if(!isPacked() && _passMemExc) {
				// Pass the exception up so that we can retry using a
				// packed string representation
				throw e;
			}
			// There's no point passing this exception on.  The fact
			// that we couldn't allocate the joined string means that
			// --bmax is irrelevant - the user should re-run with
			// ebwt-build-packed
			if(isPacked()) {
				cerr << "Please try running bowtie-build on a computer with more memory." << endl;
			} else {
				cerr << "Please try running bowtie-build in packed mode (-p/--packed) or in automatic" << endl
				     << "mode (-a/--auto), or try again on a computer with more memory." << endl;
			}
			if(sizeof(void*) == 4) {
				cerr << "If this computer has more than 4 GB of memory, try using a 64-bit executable;" << endl
				     << "this executable is 32-bit." << endl;
			}
			throw 1;
		}

		// Succesfully obtained joined reference string
		assert_geq(s.length(), jlen);
		if(bmax != (index_t)OFF_MASK) {
			// VMSG_NL("bmax according to bmax setting: " << bmax);
		}
		else if(bmaxSqrtMult != (index_t)OFF_MASK) {
			bmax *= bmaxSqrtMult;
			// VMSG_NL("bmax according to bmaxSqrtMult setting: " << bmax);
		}
		else if(bmaxDivN != (index_t)OFF_MASK) {
			bmax = max<uint32_t>(jlen / (bmaxDivN * _nthreads), 1);
			// VMSG_NL("bmax according to bmaxDivN setting: " << bmax);
		}
		else {
			bmax = (uint32_t)sqrt(s.length());
			// VMSG_NL("bmax defaulted to: " << bmax);
		}

		int iter = 0;
		bool first = true;
		streampos out1pos = out1.tellp();
		streampos out2pos = out2.tellp();

        if(!_repeat) {
            // Look for bmax/dcv parameters that work.
            while(true) {
                if(!first && bmax < 40 && _passMemExc) {
                    cerr << "Could not find approrpiate bmax/dcv settings for building this index." << endl;
                    if(!isPacked()) {
                        // Throw an exception exception so that we can
                        // retry using a packed string representation
                        throw bad_alloc();
                    } else {
                        cerr << "Already tried a packed string representation." << endl;
                    }
                    cerr << "Please try indexing this reference on a computer with more memory." << endl;
                    if(sizeof(void*) == 4) {
                        cerr << "If this computer has more than 4 GB of memory, try using a 64-bit executable;" << endl
                        << "this executable is 32-bit." << endl;
                    }
                    throw 1;
                }
                if(!first) {
                    out1.seekp(out1pos);
                    out2.seekp(out2pos);
                }
                if(dcv > 4096) dcv = 4096;
                if((iter % 6) == 5 && dcv < 4096 && dcv != 0) {
                    dcv <<= 1; // double difference-cover period
                } else {
                    bmax -= (bmax >> 2); // reduce by 25%
                }
                iter++;
                try {
                    if(_alts.empty()) {
                        VMSG("Using parameters --bmax " << bmax);
                        if(dcv == 0) {
                            VMSG_NL(" and *no difference cover*");
                        } else {
                            VMSG_NL(" --dcv " << dcv);
                        }
                        {
                            VMSG_NL("  Doing ahead-of-time memory usage test");
                            // Make a quick-and-dirty attempt to force a bad_alloc iff
                            // we would have thrown one eventually as part of
                            // constructing the DifferenceCoverSample
                            dcv <<= 1;
                            index_t sz = (index_t)DifferenceCoverSample<TStr>::simulateAllocs(s, dcv >> 1);
                            if(_nthreads > 1) sz *= (_nthreads + 1);
                            AutoArray<uint8_t> tmp(sz, EBWT_CAT);
                            dcv >>= 1;
                            // Likewise with the KarkkainenBlockwiseSA
                            sz = (index_t)KarkkainenBlockwiseSA<TStr>::simulateAllocs(s, bmax);
                            AutoArray<uint8_t> tmp2(sz, EBWT_CAT);
                            // Now throw in the 'ftab' and 'isaSample' structures
                            // that we'll eventually allocate in buildToDisk
                            AutoArray<index_t> ftab(_gh._ftabLen * 2, EBWT_CAT);
                            AutoArray<uint8_t> side(_gh._sideSz, EBWT_CAT);
                            // Grab another 20 MB out of caution
                            AutoArray<uint32_t> extra(20*1024*1024, EBWT_CAT);
                            // If we made it here without throwing bad_alloc, then we
                            // passed the memory-usage stress test
                            VMSG("  Passed!  Constructing with these parameters: --bmax " << bmax << " --dcv " << dcv);
                            if(isPacked()) {
                                VMSG(" --packed");
                            }
                            VMSG_NL("");
                        }
                        VMSG_NL("Constructing suffix-array element generator");
                        KarkkainenBlockwiseSA<TStr> bsa(s, bmax, _nthreads, dcv, seed, _sanity, _passMemExc, _verbose, outfile);
                        assert(bsa.suffixItrIsReset());
                        assert_eq(bsa.size(), s.length()+1);
                        VMSG_NL("Converting suffix-array elements to index image");
                        buildToDisk(bsa, s, out1, out2);
                    } else {
                        RefGraph<index_t>* graph = new RefGraph<index_t>(
                                                                         s,
                                                                         szs,
                                                                         _alts,
                                                                         _haplotypes,
                                                                         outfile,
                                                                         _nthreads,
                                                                         verbose);
                        PathGraph<index_t>* pg = new PathGraph<index_t>(
                                                                        *graph,
                                                                        outfile,
                                                                        std::numeric_limits<index_t>::max(),
                                                                        _nthreads,
                                                                        verbose);

                        if(verbose) { cerr << "Generating edges... " << endl; }
                        if(!pg->generateEdges(*graph)) { return; }
                        // Re-initialize GFM parameters to reflect real number of edges (gbwt string)
                        _gh.init(
                                 _gh.len(),
                                 pg->getNumEdges(),
                                 pg->getNumNodes(),
                                 _gh.lineRate(),
                                 _gh.offRate(),
                                 _gh.ftabChars(),
                                 0,
                                 _gh.entireReverse());
                        buildToDisk(*pg, s, out1, out2);
                        delete pg; pg = NULL;
                        delete graph; graph = NULL;
                    }
                    out1.flush(); out2.flush();
                    if(out1.fail() || out2.fail()) {
                        cerr << "An error occurred writing the index to disk.  Please check if the disk is full." << endl;
                        throw 1;
                    }
                    break;
                } catch(bad_alloc& e) {
                    if(_passMemExc) {
                        VMSG_NL("  Ran out of memory; automatically trying more memory-economical parameters.");
                    } else {
                        cerr << "Out of memory while constructing suffix array.  Please try using a smaller" << endl
                        << "number of blocks by specifying a smaller --bmax or a larger --bmaxdivn" << endl;
                        throw 1;
                    }
                }
                first = false;
            }
            assert(repOk());

            // Now write reference sequence names on the end
            assert_eq(this->_refnames.size(), this->_nPat);
            for(index_t i = 0; i < this->_refnames.size(); i++) {
                out1 << this->_refnames[i].c_str() << endl;
            }
            out1 << '\0';
            out1.flush(); out2.flush();
            if(out1.fail() || out2.fail()) {
                cerr << "An error occurred writing the index to disk.  Please check if the disk is full." << endl;
                throw 1;
            }
        }
		VMSG_NL("Returning from initFromVector");
	}
	
	/**
	 * Return the length that the joined string of the given string
	 * list will have.  Note that this is indifferent to how the text
	 * fragments correspond to input sequences - it just cares about
	 * the lengths of the fragments.
	 */
	index_t joinedLen(EList<RefRecord>& szs) {
		index_t ret = 0;
		for(unsigned int i = 0; i < szs.size(); i++) {
			ret += (index_t)szs[i].len;
		}
		return ret;
	}

	/// Destruct an Ebwt
	~GFM() {
		_fchr.reset();
		_ftab.reset();
		_eftab.reset();
		_plen.reset();
		_rstarts.reset();
		_offs.reset();
		_gfm.reset();
		if(offs() != NULL && useShmem_) {
			FREE_SHARED(offs());
		}
		if(gfm() != NULL && useShmem_) {
			FREE_SHARED(gfm());
		}
		if (_in1 != NULL) fclose(_in1);
		if (_in2 != NULL) fclose(_in2);
	}

	/// Accessors
	inline const GFMParams<index_t>& gh() const     { return _gh; }
    index_t    numZOffs() const     { return _zOffs.size(); }
    index_t    zOff(index_t i) const         { assert_lt(i, _zOffs.size()); return _zOffs[i]; }
    index_t    zGbwtByteOff(index_t i) const { assert_lt(i, _zGbwtByteOffs.size()); return _zGbwtByteOffs[i]; }
    int        zGbwtBpOff(index_t i) const   { assert_lt(i, _zGbwtBpOffs.size()); return _zGbwtBpOffs[i]; }
	index_t    nPat() const        { return _nPat; }
	index_t    nFrag() const       { return _nFrag; }
	inline index_t*   fchr()              { return _fchr.get(); }
	inline index_t*   ftab()              { return _ftab.get(); }
	inline index_t*   eftab()             { return _eftab.get(); }
    inline index_t*   offs()              { return _offs.get(); }
	inline index_t*   plen()              { return _plen.get(); }
	inline index_t*   rstarts()           { return _rstarts.get(); }
	inline uint8_t*   gfm()               { return _gfm.get(); }
	inline const index_t* fchr() const    { return _fchr.get(); }
	inline const index_t* ftab() const    { return _ftab.get(); }
	inline const index_t* eftab() const   { return _eftab.get(); }
    inline const index_t* offs() const    { return _offs.get(); }
	inline const index_t* plen() const    { return _plen.get(); }
	inline const index_t* rstarts() const { return _rstarts.get(); }
	inline const uint8_t* gfm() const     { return _gfm.get(); }
    inline const EList<ALT<index_t> >& alts() const  { return _alts; }
    inline const EList<string>& altnames() const     { return _altnames; }
	bool        toBe() const         { return _toBigEndian; }
	bool        verbose() const      { return _verbose; }
	bool        sanityCheck() const  { return _sanity; }
	EList<string>& refnames()        { return _refnames; }
    bool        fw() const           { return fw_; }
    bool        repeat() const       { return _repeat; }
    const EList<uint8_t>& getRepeatIncluded() const { return _repeatIncluded; }

#ifdef POPCNT_CAPABILITY
    bool _usePOPCNTinstruction;
#endif

	/**
	 * Returns true iff the index contains the given string (exactly).  The
	 * given string must contain only unambiguous characters.  TODO:
	 * support skipping of ambiguous characters.
	 */
	bool contains(
		const BTDnaString& str,
		index_t *top = NULL,
		index_t *bot = NULL) const;

	/**
	 * Returns true iff the index contains the given string (exactly).  The
	 * given string must contain only unambiguous characters.  TODO:
	 * support skipping of ambiguous characters.
	 */
	bool contains(
		const char *str,
		index_t *top = NULL,
		index_t *bot = NULL) const
	{
		return contains(BTDnaString(str, true), top, bot);
	}
	
	/// Return true iff the Ebwt is currently in memory
	bool isInMemory() const {
		if(gfm() != NULL) {
			// Note: We might have skipped loading _offs, _ftab,
			// _eftab, and _rstarts depending on whether this is the
			// reverse index and what algorithm is being used.
			assert(_gh.repOk());
			//assert(_ftab != NULL);
			//assert(_eftab != NULL);
			assert(fchr() != NULL);
			//assert(_offs != NULL);
			//assert(_rstarts != NULL);
			// assert_neq(_zGbwtByteOff, INDEX_MAX);
			// assert_neq(_zGbwtBpOff, -1);
			return true;
		} else {
			assert(ftab() == NULL);
			assert(eftab() == NULL);
			assert(fchr() == NULL);
			assert(offs() == NULL);
			assert(rstarts() == NULL);
            assert_eq(_zOffs.size(), 0);
            assert_eq(_zGbwtByteOffs.size(), 0);
            assert_eq(_zGbwtBpOffs.size(), 0);
			return false;
		}
	}

	/// Return true iff the Ebwt is currently stored on disk
	bool isEvicted() const {
		return !isInMemory();
	}

	/**
	 * Load this Ebwt into memory by reading it in from the _in1 and
	 * _in2 streams.
	 */
	void loadIntoMemory(
		int needEntireReverse,
		bool loadSASamp,
		bool loadFtab,
		bool loadRstarts,
		bool loadNames,
		bool verbose)
	{
		readIntoMemory(
			needEntireReverse, // require reverse index to be concatenated reference reversed
			loadSASamp,  // load the SA sample portion?
			loadFtab,    // load the ftab (_ftab[] and _eftab[])?
			loadRstarts, // load the r-starts (_rstarts[])?
			false,       // stop after loading the header portion?
			NULL,        // params
			false,       // mmSweep
			loadNames,   // loadNames
			verbose);    // startVerbose
	}

	/**
	 * Frees memory associated with the Ebwt.
	 */
	void evictFromMemory() {
		assert(isInMemory());
		_fchr.free();
		_ftab.free();
		_eftab.free();
		_rstarts.free();
		_offs.free(); // might not be under control of APtrWrap
		_gfm.free(); // might not be under control of APtrWrap
		// Keep plen; it's small and the client may want to seq it
		// even when the others are evicted.
		//_plen  = NULL;
        _zOffs.clear();
        _zGbwtByteOffs.clear();
        _zGbwtBpOffs.clear();
	}

	/**
	 * Turn a substring of 'seq' starting at offset 'off' and having
	 * length equal to the index's 'ftabChars' into an int that can be
	 * used to index into the ftab array.
	 */
	index_t ftabSeqToInt(
		const BTDnaString& seq,
		index_t off,
		bool rev) const
	{
		int fc = _gh._ftabChars;
		index_t lo = off, hi = lo + fc;
		assert_leq(hi, seq.length());
		index_t ftabOff = 0;
		for(int i = 0; i < fc; i++) {
			bool fwex = fw();
			if(rev) fwex = !fwex;
			// We add characters to the ftabOff in the order they would
			// have been consumed in a normal search.  For BWT, this
			// means right-to-left order; for BWT' it's left-to-right.
			int c = (fwex ? seq[lo + i] : seq[hi - i - 1]);
			if(c > 3) {
				return std::numeric_limits<index_t>::max();
			}
			assert_range(0, 3, c);
			ftabOff <<= 2;
			ftabOff |= c;
		}
		return ftabOff;
	}
	
	/**
	 * Non-static facade for static function ftabHi.
	 */
	index_t ftabHi(index_t i) const {
		return GFM<index_t>::ftabHi(
			ftab(),
			eftab(),
            _gh.linearFM() ? _gh._len : _gh._gbwtLen,
			_gh._ftabLen,
		    _gh._eftabLen,
			i);
	}

	/**
	 * Get "high interpretation" of ftab entry at index i.  The high
	 * interpretation of a regular ftab entry is just the entry
	 * itself.  The high interpretation of an extended entry is the
	 * second correpsonding ui32 in the eftab.
	 *
	 * It's a static member because it's convenient to ask this
	 * question before the Ebwt is fully initialized.
	 */
	static index_t ftabHi(
		const index_t *ftab,
		const index_t *eftab,
		index_t gbwtLen,
		index_t ftabLen,
		index_t eftabLen,
		index_t i)
	{
		assert_lt(i, ftabLen);
		if(ftab[i] <= gbwtLen) {
			return ftab[i];
		} else {
			index_t efIdx = ftab[i] ^ (index_t)INDEX_MAX;
			assert_lt(efIdx*2+1, eftabLen);
			return eftab[efIdx*2+1];
		}
	}

	/**
	 * Non-static facade for static function ftabLo.
	 */
	index_t ftabLo(index_t i) const {
		return GFM<index_t>::ftabLo(
			ftab(),
			eftab(),
			_gh.linearFM() ? _gh._len : _gh._gbwtLen,
			_gh._ftabLen,
		    _gh._eftabLen,
			i);
	}
	
	/**
	 * Get low bound of ftab range.
	 */
	index_t ftabLo(const BTDnaString& seq, index_t off) const {
		return ftabLo(ftabSeqToInt(seq, off, false));
	}

	/**
	 * Get high bound of ftab range.
	 */
	index_t ftabHi(const BTDnaString& seq, index_t off) const {
		return ftabHi(ftabSeqToInt(seq, off, false));
	}
	
	/**
	 * Extract characters from seq starting at offset 'off' and going either
	 * forward or backward, depending on 'rev'.  Order matters when compiling
	 * the integer that gets looked up in the ftab.  Each successive character
	 * is ORed into the least significant bit-pair, and characters are
	 * integrated in the direction of the search.
	 */
	bool
	ftabLoHi(
		const BTDnaString& seq, // sequence to extract from
		index_t off,             // offset into seq to begin extracting
		bool rev,               // reverse while extracting
		index_t& top,
		index_t& bot) const
	{
		index_t fi = ftabSeqToInt(seq, off, rev);
		if(fi == std::numeric_limits<index_t>::max()) {
			return false;
		}
		top = ftabHi(fi);
		bot = ftabLo(fi+1);
		assert_geq(bot, top);
		return true;
	}
	
	/**
	 * Get "low interpretation" of ftab entry at index i.  The low
	 * interpretation of a regular ftab entry is just the entry
	 * itself.  The low interpretation of an extended entry is the
	 * first correpsonding ui32 in the eftab.
	 *
	 * It's a static member because it's convenient to ask this
	 * question before the Ebwt is fully initialized.
	 */
	static index_t ftabLo(
		const index_t *ftab,
		const index_t *eftab,
		index_t gbwtLen,
		index_t ftabLen,
		index_t eftabLen,
		index_t i)
	{
		assert_lt(i, ftabLen);
		if(ftab[i] <= gbwtLen) {
			return ftab[i];
		} else {
			index_t efIdx = ftab[i] ^ (index_t)INDEX_MAX;
			assert_lt(efIdx*2+1, eftabLen);
			return eftab[efIdx*2];
		}
	}

	/**
	 * Try to resolve the reference offset of the BW element 'elt'.  If
	 * it can be resolved immediately, return the reference offset.  If
	 * it cannot be resolved immediately, return 0xffffffff.
	 */
	index_t tryOffset(index_t elt, index_t node) const {
		assert(offs() != NULL);
        for(index_t i = 0; i < _zOffs.size(); i++) {
            if(elt == _zOffs[i]) return 0;
        }
		if((node & _gh._offMask) == node) {
			index_t nodeOff = node >> _gh._offRate;
			assert_lt(nodeOff, _gh._offsLen);
			index_t off = offs()[nodeOff];
			return off;
		} else {
			// Try looking at zoff
			return (index_t)INDEX_MAX;
		}
	}

	/**
	 * Try to resolve the reference offset of the BW element 'elt' such
	 * that the offset returned is at the right-hand side of the
	 * forward reference substring involved in the hit.
	 */
	index_t tryOffset(
		index_t elt,
		bool fw,
		index_t hitlen) const
	{
		index_t off = tryOffset(elt);
		if(off != (index_t)INDEX_MAX && !fw) {
			assert_lt(off, _gh._len);
			off = _gh._len - off - 1;
			assert_geq(off, hitlen-1);
			off -= (hitlen-1);
			assert_lt(off, _gh._len);
		}
		return off;
	}

	/**
	 * Walk 'steps' steps to the left and return the row arrived at.
	 */
	index_t walkLeft(index_t row, index_t steps) const;

	/**
	 * Resolve the reference offset of the BW element 'elt'.
	 */
	index_t getOffset(index_t row, index_t node = 0) const;

	/**
	 * Resolve the reference offset of the BW element 'elt' such that
	 * the offset returned is at the right-hand side of the forward
	 * reference substring involved in the hit.
	 */
	index_t getOffset(
		index_t elt,
		bool fw,
		index_t hitlen) const;

	/**
	 * When using read() to create an Ebwt, we have to set a couple of
	 * additional fields in the Ebwt object that aren't part of the
	 * parameter list and are not stored explicitly in the file.  Right
	 * now, this just involves initializing _zEbwtByteOff and
	 * _zEbwtBpOff from _zOff.
	 */
	void postReadInit(const GFMParams<index_t>& gh) {
        _zGbwtByteOffs.resizeExact(_zOffs.size());
        _zGbwtBpOffs.resizeExact(_zOffs.size());
        for(index_t i = 0; i < _zOffs.size(); i++) {
            index_t sideNum     = _zOffs[i] / gh._sideGbwtLen;
            index_t sideCharOff = _zOffs[i] % gh._sideGbwtLen;
            index_t sideByteOff = sideNum * gh._sideSz;
            _zGbwtByteOffs[i] = sideCharOff >> 2;
            assert_lt(_zGbwtByteOffs[i], gh._sideGbwtSz);
            _zGbwtBpOffs[i] = sideCharOff & 3;
            assert_lt(_zGbwtBpOffs[i], 4);
            _zGbwtByteOffs[i] += sideByteOff;
        }
        assert(repOk(gh)); // Ebwt should be fully initialized now
	}
    
	/**
	 * Given basename of an Ebwt index, read and return its flag.
	 */
	static int32_t readVersionFlags(const string& instr, int& major, int& minor, string& extra_version);
    
    static void readProgramVersion(int& major_version, int& minor_version, string& extra_version) {
        char extra[256] = {0,};
        int second_version;
        sscanf(HISAT2_VERSION, "%d.%d.%d-%s",
               &second_version,
               &major_version,
               &minor_version,
               extra);
        extra_version = extra;
    }
    
    static void readIndexVersion(int index_version, int& major_version, int& minor_version, string& extra_version) {
        major_version = (index_version >> 16) & 0xff;
        minor_version = (index_version >> 8) & 0xff;
        if((index_version & 0xff) == 1) {
            extra_version = "alpha";
        } else if((index_version & 0xff) == 2) {
            extra_version = "beta";
        } else {
            extra_version = "";
        }
    }

    static int getIndexVersion() {
        int major_version = 0, minor_version = 0;
        string extra_version;
        readProgramVersion(major_version, minor_version, extra_version);
        int version = 2; // HISAT2
        version = (version << 8) | (major_version & 0xff);
        version = (version << 8) | (minor_version & 0xff);
        version = version << 8;
        if(extra_version == "alpha") {
            version |= 0x1;
        } else if(extra_version == "beta") {
            version |= 0x2;
        }
        return version;
    }

	/**
	 * Pretty-print the Ebwt to the given output stream.
	 */
	void print(ostream& out) const {
		print(out, _gh);
	}
	
	/**
	 * Pretty-print the Ebwt and given EbwtParams to the given output
	 * stream.
	 */
	void print(ostream& out, const GFMParams<index_t>& gh) const {
		gh.print(out); // print params
        return;
        out << "Ebwt (" << (isInMemory()? "memory" : "disk") << "):" << endl;
        for(index_t i = 0; i < _zOffs.size(); i++) {
            out << "    " << (i+1) << " zOffs: "     << _zOffs[i] << endl
                << "    " << (i+1) << " zGbwtByteOff: " << _zGbwtByteOffs[i] << endl
                << "    " << (i+1) << " zGbwtBpOff: "   << _zGbwtBpOffs[i] << endl;
        }
		    out << "    nPat: "  << _nPat << endl
		        << "    plen: ";
		if(plen() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << plen()[0] << endl;
		}
		out << "    rstarts: ";
		if(rstarts() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << rstarts()[0] << endl;
		}
		out << "    ebwt: ";
		if(gfm() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << gfm()[0] << endl;
		}
		out << "    fchr: ";
		if(fchr() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << fchr()[0] << endl;
		}
		out << "    ftab: ";
		if(ftab() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << ftab()[0] << endl;
		}
		out << "    eftab: ";
		if(eftab() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << eftab()[0] << endl;
		}
		out << "    offs: ";
		if(offs() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << offs()[0] << endl;
		}
	}

	// Building
	template <typename TStr> static TStr join(EList<TStr>& l, uint32_t seed);
	template <typename TStr> static void join(EList<FileBuf*>& l, EList<RefRecord>& szs, index_t sztot, const RefReadInParams& refparams, uint32_t seed, TStr& s, bool include_rc = false, bool CGtoTG = false);
	template <typename TStr> void joinToDisk(EList<FileBuf*>& l, EList<RefRecord>& szs, index_t sztot, const RefReadInParams& refparams, TStr& ret, ostream& out1, ostream& out2);
	template <typename TStr> void buildToDisk(PathGraph<index_t>& gbwt, const TStr& s, ostream& out1, ostream& out2, streampos headerPos = -1);
    template <typename TStr> void buildToDisk(InorderBlockwiseSA<TStr>& sa, const TStr& s, ostream& out1, ostream& out2, streampos headerPos = -1);

	// I/O
	void readIntoMemory(int needEntireRev, bool loadSASamp, bool loadFtab, bool loadRstarts, bool justHeader, GFMParams<index_t> *params, bool mmSweep, bool loadNames, bool startVerbose, bool subIndex = false);
	void writeFromMemory(bool justHeader, ostream& out1, ostream& out2) const;
	void writeFromMemory(bool justHeader, const string& out1, const string& out2) const;

	// Sanity checking
	void sanityCheckUpToSide(int upToSide) const;
	void sanityCheckAll(int reverse) const;
	void restore(SString<char>& s) const;
	void checkOrigs(const EList<SString<char> >& os, bool mirror) const;

	// Searching and reporting
	bool joinedToTextOff(index_t qlen, index_t off, index_t& tidx, index_t& textoff, index_t& tlen, bool rejectStraddle, bool& straddled) const;
    bool textOffToJoined(index_t tid, index_t tlen, index_t& off) const;

#define WITHIN_BWT_LEN(x) \
	assert_leq(x[0], this->_gh._sideGbwtLen); \
	assert_leq(x[1], this->_gh._sideGbwtLen); \
	assert_leq(x[2], this->_gh._sideGbwtLen); \
	assert_leq(x[3], this->_gh._sideGbwtLen)

#define WITHIN_FCHR(x) \
	assert_leq(x[0], this->fchr()[1]); \
	assert_leq(x[1], this->fchr()[2]); \
	assert_leq(x[2], this->fchr()[3]); \
	assert_leq(x[3], this->fchr()[4])

#define WITHIN_FCHR_DOLLARA(x) \
	assert_leq(x[0], this->fchr()[1]+1); \
	assert_leq(x[1], this->fchr()[2]); \
	assert_leq(x[2], this->fchr()[3]); \
	assert_leq(x[3], this->fchr()[4])

	/**
	 * Count all occurrences of character c from the beginning of the
	 * forward side to <by,bp> and add in the occ[] count up to the side
	 * break just prior to the side.
	 *
	 * A Bowtie 2 side is shaped like:
	 *
	 * XXXXXXXXXXXXXXXX [A] [C] [G] [T]
	 * --------48------ -4- -4- -4- -4-  (numbers in bytes)
	 */
	inline index_t countBt2Side(const SideLocus<index_t>& l, int c) const {
        assert_range(0, 3, c);
        assert_range(0, (int)this->_gh._sideGbwtSz-1, (int)l._by);
        assert_range(0, 3, (int)l._bp);
        const uint8_t *side = l.side(this->gfm());
        index_t cCnt = countUpTo(l, c);
        assert_leq(cCnt, l.toBWRow(_gh));
        assert_leq(cCnt, this->_gh._sideGbwtLen);
        assert_eq(_zGbwtByteOffs.size(), _zGbwtBpOffs.size());
        for(index_t i = 0; i < _zGbwtByteOffs.size(); i++) {
            index_t zGbwtByteOff = _zGbwtByteOffs[i];
            if(c == 0 && l._sideByteOff <= zGbwtByteOff && l._sideByteOff + l._by >= zGbwtByteOff) {
                // Adjust for the fact that we represented $ with an 'A', but
                // shouldn't count it as an 'A' here
                int zGbwtBpOff = _zGbwtBpOffs[i];
                if((l._sideByteOff + l._by > zGbwtByteOff) ||
                   (l._sideByteOff + l._by == zGbwtByteOff && l._bp > zGbwtBpOff))
                {
                    cCnt--; // Adjust for '$' looking like an 'A'
                }
            }
        }
        index_t ret;
        // Now factor in the occ[] count at the side break
        const uint8_t *acgt8 = side + _gh._sideGbwtSz;
        if(!_gh._linearFM) {
            acgt8 += (sizeof(index_t) << 1);
        }
        const index_t *acgt = reinterpret_cast<const index_t*>(acgt8);
        assert_leq(acgt[0], this->_gh._numSides * this->_gh._sideGbwtLen); // b/c it's used as padding
        assert_lt(acgt[1], this->_gh._gbwtLen);
        assert_lt(acgt[2], this->_gh._gbwtLen);
        assert_lt(acgt[3], this->_gh._gbwtLen);
        ret = acgt[c] + cCnt + this->fchr()[c];
#ifndef NDEBUG
        assert_leq(ret, this->fchr()[c+1]); // can't have jumpded into next char's section
        if(c == 0) {
            assert_leq(cCnt, this->_gh._sideGbwtLen);
        } else {
            assert_leq(ret, this->_gh._gbwtLen);
        }
#endif
        return ret;
	}

	/**
	 * Count all occurrences of all four nucleotides up to the starting
	 * point (which must be in a forward side) given by 'l' storing the
	 * result in 'cntsUpto', then count nucleotide occurrences within the
	 * range of length 'num' storing the result in 'cntsIn'.  Also, keep
	 * track of the characters occurring within the range by setting
	 * 'masks' accordingly (masks[1][10] == true -> 11th character is a
	 * 'C', and masks[0][10] == masks[2][10] == masks[3][10] == false.
	 */
	inline void countBt2SideRange(
		const SideLocus<index_t>& l,        // top locus
		index_t num,        // number of elts in range to tall
		index_t* cntsUpto,  // A/C/G/T counts up to top
		index_t* cntsIn,    // A/C/G/T counts within range
		EList<bool> *masks) const // masks indicating which range elts = A/C/G/T
	{
		assert_gt(num, 0);
		assert_range(0, (int)this->_gh._sideGbwtSz-1, (int)l._by);
		assert_range(0, 3, (int)l._bp);
		countUpToEx(l, cntsUpto);
		WITHIN_FCHR_DOLLARA(cntsUpto);
		WITHIN_BWT_LEN(cntsUpto);
		const uint8_t *side = l.side(this->gfm());
        assert_eq(_zGbwtByteOffs.size(), _zGbwtBpOffs.size());
        for(index_t i = 0; i < _zGbwtByteOffs.size(); i++) {
            index_t zGbwtByteOff = _zGbwtByteOffs[i];
            if(l._sideByteOff <= zGbwtByteOff && l._sideByteOff + l._by >= zGbwtByteOff) {
                // Adjust for the fact that we represented $ with an 'A', but
                // shouldn't count it as an 'A' here
                int zGbwtBpOff = _zGbwtBpOffs[i];
                if((l._sideByteOff + l._by > zGbwtByteOff) ||
                   (l._sideByteOff + l._by == zGbwtByteOff && l._bp > zGbwtBpOff))
                {
                    cntsUpto[0]--; // Adjust for '$' looking like an 'A'
                }
            }
        }
		// Now factor in the occ[] count at the side break
        const index_t *acgt = reinterpret_cast<const index_t*>(side + _gh._sideGbwtSz);
        if(!this->_gh.linearFM()) acgt += 2;
		assert_leq(acgt[0], this->fchr()[1] + this->_gh.sideGbwtLen());
		assert_leq(acgt[1], this->fchr()[2]-this->fchr()[1]);
		assert_leq(acgt[2], this->fchr()[3]-this->fchr()[2]);
		assert_leq(acgt[3], this->fchr()[4]-this->fchr()[3]);
		assert_leq(acgt[0], this->_gh._gbwtLen + this->_gh.sideGbwtLen());
		assert_leq(acgt[1], this->_gh._gbwtLen);
		assert_leq(acgt[2], this->_gh._gbwtLen);
		assert_leq(acgt[3], this->_gh._gbwtLen);
		cntsUpto[0] += (acgt[0] + this->fchr()[0]);
		cntsUpto[1] += (acgt[1] + this->fchr()[1]);
		cntsUpto[2] += (acgt[2] + this->fchr()[2]);
		cntsUpto[3] += (acgt[3] + this->fchr()[3]);
		masks[0].resize(num);
		masks[1].resize(num);
		masks[2].resize(num);
		masks[3].resize(num);
		WITHIN_FCHR_DOLLARA(cntsUpto);
		WITHIN_FCHR_DOLLARA(cntsIn);
		// 'cntsUpto' is complete now.
		// Walk forward until we've tallied the entire 'In' range
		index_t nm = 0;
		// Rest of this side
		nm += countBt2SideRange2(l, true, num - nm, cntsIn, masks, nm);
		assert_eq(nm, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]);
		assert_leq(nm, num);
		SideLocus<index_t> lcopy = l;
		while(nm < num) {
			// Subsequent sides, if necessary
			lcopy.nextSide(this->_gh);
			nm += countBt2SideRange2(lcopy, false, num - nm, cntsIn, masks, nm);
			WITHIN_FCHR_DOLLARA(cntsIn);
			assert_leq(nm, num);
			assert_eq(nm, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]);
		}
		assert_eq(num, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]);
		WITHIN_FCHR_DOLLARA(cntsIn);
	}

	/**
	 * Count all occurrences of character c from the beginning of the
	 * forward side to <by,bp> and add in the occ[] count up to the side
	 * break just prior to the side.
	 *
	 * A forward side is shaped like:
	 *
	 * [A] [C] XXXXXXXXXXXXXXXX
	 * -4- -4- --------56------ (numbers in bytes)
	 *         ^
	 *         Side ptr (result from SideLocus.side())
	 *
	 * And following it is a reverse side shaped like:
	 * 
	 * [G] [T] XXXXXXXXXXXXXXXX
	 * -4- -4- --------56------ (numbers in bytes)
	 *         ^
	 *         Side ptr (result from SideLocus.side())
	 *
	 */
	inline void countBt2SideEx(const SideLocus<index_t>& l, index_t* arrs) const {
		assert_range(0, (int)this->_gh._sideGbwtSz-1, (int)l._by);
		assert_range(0, 3, (int)l._bp);
		countUpToEx(l, arrs);
        assert_eq(_zGbwtByteOffs.size(), _zGbwtBpOffs.size());
        for(index_t i = 0; i < _zGbwtByteOffs.size(); i++) {
            index_t zGbwtByteOff = _zGbwtByteOffs[i];
            if(l._sideByteOff <= zGbwtByteOff && l._sideByteOff + l._by >= zGbwtByteOff) {
                // Adjust for the fact that we represented $ with an 'A', but
                // shouldn't count it as an 'A' here
                int zGbwtBpOff = _zGbwtBpOffs[i];
                if((l._sideByteOff + l._by > zGbwtByteOff) ||
                   (l._sideByteOff + l._by == zGbwtByteOff && l._bp > zGbwtBpOff))
                {
                    arrs[0]--; // Adjust for '$' looking like an 'A'
                }
            }
        }
		WITHIN_FCHR(arrs);
		WITHIN_BWT_LEN(arrs);
		// Now factor in the occ[] count at the side break
		const uint8_t *side = l.side(this->gfm());
		const uint8_t *acgt16 = side + this->_gh._sideSz - sizeof(index_t) * 4;
		const index_t *acgt = reinterpret_cast<const index_t*>(acgt16);
		assert_leq(acgt[0], this->fchr()[1] + this->_gh.sideGbwtLen());
		assert_leq(acgt[1], this->fchr()[2]-this->fchr()[1]);
		assert_leq(acgt[2], this->fchr()[3]-this->fchr()[2]);
		assert_leq(acgt[3], this->fchr()[4]-this->fchr()[3]);
		assert_leq(acgt[0], this->_gh._len + this->_gh.sideGbwtLen());
		assert_leq(acgt[1], this->_gh._len);
		assert_leq(acgt[2], this->_gh._len);
		assert_leq(acgt[3], this->_gh._len);
		arrs[0] += (acgt[0] + this->fchr()[0]);
		arrs[1] += (acgt[1] + this->fchr()[1]);
		arrs[2] += (acgt[2] + this->fchr()[2]);
		arrs[3] += (acgt[3] + this->fchr()[3]);
		WITHIN_FCHR(arrs);
	}
    
    /**
     * Count all occurrences of character 1 from the beginning of the
     * forward side to <by,bp> and add in the occ[] count up to the side
     * break just prior to the side.
     *
     */
    inline index_t countMSide(const SideLocus<index_t>& l) const {
        assert_range(0, (int)this->_gh._sideGbwtSz-1, (int)l._by);
        assert_range(0, 7, (int)l._bp);
        index_t cCnt = countUpTo_bits(l, false /* F? */);
        const uint8_t *side = l.side(this->gfm());
        cCnt += *(index_t*)(side + _gh._sideGbwtSz + sizeof(index_t));
        assert_leq(cCnt, l.toBWRow(_gh));
        assert_leq(cCnt, this->_gh._numNodes);
        return cCnt;
    }

    /**
	 * Counts the number of occurrences of character 'c' in the given Ebwt
	 * side up to (but not including) the given byte/bitpair (by/bp).
	 *
	 * This is a performance-critical function.  This is the top search-
	 * related hit in the time profile.
	 *
	 * Function gets 11.09% in profile
	 */
	inline index_t countUpTo(const SideLocus<index_t>& l, int c) const {
		// Count occurrences of c in each 64-bit (using bit trickery);
		// Someday countInU64() and pop() functions should be
		// vectorized/SSE-ized in case that helps.
        bool usePOPCNT = false;
		index_t cCnt = 0;
		const uint8_t *side = l.side(this->gfm());
		int i = 0;
#ifdef POPCNT_CAPABILITY
        if(_usePOPCNTinstruction) {
            usePOPCNT = true;
            int by = l._by + (l._bp > 0 ? 1 : 0);
            for(; i < by; i += 8) {
                if(i + 8 < by) {
                    cCnt += countInU64<USE_POPCNT_INSTRUCTION>(c, *(uint64_t*)&side[i]);
                } else {
                    index_t by_shift = 8 - (by - i);
                    index_t bp_shift = (l._bp > 0 ? 4 - l._bp : 0);
                    index_t shift = (by_shift << 3) + (bp_shift << 1);
                    uint64_t side_i = *(uint64_t*)&side[i];
                    side_i = (_toBigEndian ? side_i >> shift : side_i << shift);
                    index_t cCnt_add = countInU64<USE_POPCNT_INSTRUCTION>(c, side_i);
                    if(c == 0) cCnt_add -= (shift >> 1);
#ifndef NDEBUG
                    index_t cCnt_temp = 0;
                    for(int j = i; j < l._by; j++) {
                        cCnt_temp += cCntLUT_4[0][c][side[j]];
                    }
                    if(l._bp > 0) {
                        cCnt_temp += cCntLUT_4[(int)l._bp][c][side[l._by]];
                    }
                    assert_eq(cCnt_add, cCnt_temp);
#endif
                    cCnt += cCnt_add;
                    break;
                }
            }
        } else {
            for(; i + 7 < l._by; i += 8) {
                cCnt += countInU64<USE_POPCNT_GENERIC>(c, *(uint64_t*)&side[i]);
            }
        }
#else
        for(; i + 7 < l._by; i += 8) {
            cCnt += countInU64(c, *(uint64_t*)&side[i]);
        }
#endif
        
        if(!usePOPCNT) {
            // Count occurences of c in the rest of the side (using LUT)
            for(; i < l._by; i++) {
                cCnt += cCntLUT_4[0][c][side[i]];
            }
            
            // Count occurences of c in the rest of the byte
            if(l._bp > 0) {
                cCnt += cCntLUT_4[(int)l._bp][c][side[i]];
            }
        }
        
		return cCnt;
	}
    
    /**
	 * Counts the number of occurrences of character 'c' in the given Ebwt
	 * side down to the given byte/bitpair (by/bp).
	 *
	 */
	inline index_t countDownTo(const SideLocus<index_t>& l, int c) const {
		// Count occurrences of c in each 64-bit (using bit trickery);
		// Someday countInU64() and pop() functions should be
		// vectorized/SSE-ized in case that helps.
		index_t cCnt = 0;
		const uint8_t *side = l.side(this->gfm());
		int i = 64 - 4 * sizeof(index_t) - 1;
#ifdef POPCNT_CAPABILITY
        if ( _usePOPCNTinstruction) {
            for(; i - 7 > l._by; i -= 8) {
                cCnt += countInU64<USE_POPCNT_INSTRUCTION>(c, *(uint64_t*)&side[i-7]);
            }
        }
        else {
            for(; i + 7 > l._by; i -= 8) {
                cCnt += countInU64<USE_POPCNT_GENERIC>(c, *(uint64_t*)&side[i-7]);
            }
        }
#else
        for(; i + 7 > l._by; i -= 8) {
            cCnt += countInU64(c, *(uint64_t*)&side[i-7]);
        }
#endif
		// Count occurences of c in the rest of the side (using LUT)
		for(; i > l._by; i--) {
			cCnt += cCntLUT_4_rev[0][c][side[i]];
		}
		// Count occurences of c in the rest of the byte
		if(l._bp > 0) {
			cCnt += cCntLUT_4_rev[4-(int)l._bp][c][side[i]];
		} else {
            cCnt += cCntLUT_4_rev[0][c][side[i]];
        }
		return cCnt;
	}

    /**
     * Tricky-bit-bashing bitpair counting for given two-bit value (0-3)
     * within a 64-bit argument.
     *
     * Function gets 2.32% in profile
     */
#ifdef POPCNT_CAPABILITY
    template<typename Operation>
#endif
    inline static void countInU64Ex(uint64_t dw, index_t* arrs) {
        uint64_t c0 = c_table[0];
        uint64_t x0 = dw ^ c0;
        uint64_t x1 = (x0 >> 1);
        uint64_t x2 = x1 & (0x5555555555555555llu);
        uint64_t x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
        uint64_t tmp = Operation().pop64(x3);
#else
        uint64_t tmp = pop64(x3);
#endif
        arrs[0] += (uint32_t) tmp;
        
        c0 = c_table[1];
        x0 = dw ^ c0;
        x1 = (x0 >> 1);
        x2 = x1 & (0x5555555555555555llu);
        x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
        tmp = Operation().pop64(x3);
#else
        tmp = pop64(x3);
#endif
        arrs[1] += (uint32_t) tmp;
        
        c0 = c_table[2];
        x0 = dw ^ c0;
        x1 = (x0 >> 1);
        x2 = x1 & (0x5555555555555555llu);
        x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
        tmp = Operation().pop64(x3);
#else
        tmp = pop64(x3);
#endif
        arrs[2] += (uint32_t) tmp;
        
        c0 = c_table[3];
        x0 = dw ^ c0;
        x1 = (x0 >> 1);
        x2 = x1 & (0x5555555555555555llu);
        x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
        tmp = Operation().pop64(x3);
#else
        tmp = pop64(x3);
#endif
        arrs[3] += (uint32_t) tmp;
    }

	/**
	 * Counts the number of occurrences of all four nucleotides in the
	 * given side up to (but not including) the given byte/bitpair (by/bp).
	 * Count for 'a' goes in arrs[0], 'c' in arrs[1], etc.
	 */
	inline void countUpToEx(const SideLocus<index_t>& l, index_t* arrs) const {
		int i = 0;
		// Count occurrences of each nucleotide in each 64-bit word using
		// bit trickery; note: this seems does not seem to lend a
		// significant boost to performance in practice.  If you comment
		// out this whole loop (which won't affect correctness - it will
		// just cause the following loop to take up the slack) then runtime
		// does not change noticeably. Someday the countInU64() and pop()
		// functions should be vectorized/SSE-ized in case that helps.
		const uint8_t *side = l.side(this->gfm());
#ifdef POPCNT_CAPABILITY
        if (_usePOPCNTinstruction) {
            for(; i+7 < l._by; i += 8) {
                countInU64Ex<USE_POPCNT_INSTRUCTION>(*(uint64_t*)&side[i], arrs);
            }
        }
        else {
            for(; i+7 < l._by; i += 8) {
                countInU64Ex<USE_POPCNT_GENERIC>(*(uint64_t*)&side[i], arrs);
            }
        }
#else
        for(; i+7 < l._by; i += 8) {
            countInU64Ex(*(uint64_t*)&side[i], arrs);
        }
#endif
		// Count occurences of nucleotides in the rest of the side (using LUT)
		// Many cache misses on following lines (~20K)
		for(; i < l._by; i++) {
			arrs[0] += cCntLUT_4[0][0][side[i]];
			arrs[1] += cCntLUT_4[0][1][side[i]];
			arrs[2] += cCntLUT_4[0][2][side[i]];
			arrs[3] += cCntLUT_4[0][3][side[i]];
		}
		// Count occurences of c in the rest of the byte
		if(l._bp > 0) {
			arrs[0] += cCntLUT_4[(int)l._bp][0][side[i]];
			arrs[1] += cCntLUT_4[(int)l._bp][1][side[i]];
			arrs[2] += cCntLUT_4[(int)l._bp][2][side[i]];
			arrs[3] += cCntLUT_4[(int)l._bp][3][side[i]];
		}
	}
    
    /**
     * Counts the number of occurrences of character 'c' in the given Ebwt
     * side up to (but not including) the given byte/bitpair (by/bp).
     *
     * This is a performance-critical function.  This is the top search-
     * related hit in the time profile.
     */
    inline index_t countUpTo_bits(const SideLocus<index_t>& l, bool F) const {
        // Count occurrences of c in each 64-bit (using bit trickery);
        // Someday countInU64() and pop() functions should be
        // vectorized/SSE-ized in case that helps.
        bool usePOPCNT = false;
        index_t cCnt = 0;
        const uint8_t *side = l.side(this->gfm());
        if(F) {
            side += (_gh._sideGbwtSz >> 1);
        } else {
            side += (_gh._sideGbwtSz - (_gh._sideGbwtSz >> 2));
        }
        int i = 0;
#ifdef POPCNT_CAPABILITY
        if(_usePOPCNTinstruction) {
            usePOPCNT = true;
            int by = l._by + (l._bp > 0 ? 1 : 0);
            for(; i < by; i += 8) {
                if(i + 8 < by) {
                    cCnt += countInU64_bits<USE_POPCNT_INSTRUCTION>(*(uint64_t*)&side[i]);
                } else {
                    index_t by_shift = 8 - (by - i);
                    index_t bp_shift = (l._bp > 0 ? 8 - l._bp : 0);
                    index_t shift = (by_shift << 3) + bp_shift;
                    uint64_t side_i = *(uint64_t*)&side[i];
                    side_i = (_toBigEndian ? side_i >> shift : side_i << shift);
                    index_t cCnt_add = countInU64_bits<USE_POPCNT_INSTRUCTION>(side_i);
#ifndef NDEBUG
                    index_t cCnt_temp = 0;
                    for(int j = i; j < l._by; j++) {
                        cCnt_temp += cCntBIT[0][side[j]];
                    }
                    if(l._bp > 0) {
                        cCnt_temp += cCntBIT[(int)l._bp][side[l._by]];
                    }
                    assert_eq(cCnt_add, cCnt_temp);
#endif
                    cCnt += cCnt_add;
                    break;
                }
            }
        } else {
            for(; i + 7 < l._by; i += 8) {
	      cCnt += countInU64_bits<USE_POPCNT_GENERIC_BITS>(*(uint64_t*)&side[i]);
            }
        }
#else
        for(; i + 7 < l._by; i += 8) {
            cCnt += countInU64_bits(*(uint64_t*)&side[i]);
        }
#endif
        
        if(!usePOPCNT) {
            // Count occurences of c in the rest of the side (using LUT)
            for(; i < l._by; i++) {
                cCnt += cCntBIT[0][side[i]];
            }
            
            // Count occurences of c in the rest of the byte
            if(l._bp > 0) {
                cCnt += cCntBIT[(int)l._bp][side[i]];
            }
        }
        
        return cCnt;
    }
    

#ifndef NDEBUG
	/**
	 * Given top and bot loci, calculate counts of all four DNA chars up to
	 * those loci.  Used for more advanced backtracking-search.
	 */
	inline void mapLFEx(
		const SideLocus<index_t>& l,
		index_t *arrs
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		assert_eq(0, arrs[0]);
		assert_eq(0, arrs[1]);
		assert_eq(0, arrs[2]);
		assert_eq(0, arrs[3]);
		countBt2SideEx(l, arrs);
		if(_sanity && !overrideSanity) {
			// Make sure results match up with individual calls to mapLF;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			assert_eq(mapLF(l, 0, true), arrs[0]);
			assert_eq(mapLF(l, 1, true), arrs[1]);
			assert_eq(mapLF(l, 2, true), arrs[2]);
			assert_eq(mapLF(l, 3, true), arrs[3]);
		}
	}
#endif

	/**
	 * Given top and bot rows, calculate counts of all four DNA chars up to
	 * those loci.
	 */
	inline void mapLFEx(
		index_t top,
		index_t bot,
		index_t *tops,
		index_t *bots
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		SideLocus<index_t> ltop, lbot;
		SideLocus<index_t>::initFromTopBot(top, bot, _gh, gfm(), ltop, lbot);
		mapLFEx(ltop, lbot, tops, bots ASSERT_ONLY(, overrideSanity));
	}

	/**
	 * Given top and bot loci, calculate counts of all four DNA chars up to
	 * those loci.  Used for more advanced backtracking-search.
	 */
	inline void mapLFEx(
		const SideLocus<index_t>& ltop,
		const SideLocus<index_t>& lbot,
		index_t *tops,
		index_t *bots
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		assert(ltop.repOk(this->gh()));
		assert(lbot.repOk(this->gh()));
		assert_eq(0, tops[0]); assert_eq(0, bots[0]);
		assert_eq(0, tops[1]); assert_eq(0, bots[1]);
		assert_eq(0, tops[2]); assert_eq(0, bots[2]);
		assert_eq(0, tops[3]); assert_eq(0, bots[3]);
		countBt2SideEx(ltop, tops);
		countBt2SideEx(lbot, bots);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with individual calls to mapLF;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			assert_eq(mapLF(ltop, 0, true), tops[0]);
			assert_eq(mapLF(ltop, 1, true), tops[1]);
			assert_eq(mapLF(ltop, 2, true), tops[2]);
			assert_eq(mapLF(ltop, 3, true), tops[3]);
			assert_eq(mapLF(lbot, 0, true), bots[0]);
			assert_eq(mapLF(lbot, 1, true), bots[1]);
			assert_eq(mapLF(lbot, 2, true), bots[2]);
			assert_eq(mapLF(lbot, 3, true), bots[3]);
		}
#endif
	}

	/**
	 * Counts the number of occurrences of all four nucleotides in the
	 * given side from the given byte/bitpair (l->_by/l->_bp) (or the
	 * beginning of the side if l == 0).  Count for 'a' goes in arrs[0],
	 * 'c' in arrs[1], etc.
	 *
	 * Note: must account for $.
	 *
	 * Must fill in masks
	 */
	inline index_t countBt2SideRange2(
		const SideLocus<index_t>& l,
		bool startAtLocus,
		index_t num,
		index_t* arrs,
		EList<bool> *masks,
		index_t maskOff) const
	{
		assert(!masks[0].empty());
		assert_eq(masks[0].size(), masks[1].size());
		assert_eq(masks[0].size(), masks[2].size());
		assert_eq(masks[0].size(), masks[3].size());
		ASSERT_ONLY(index_t myarrs[4] = {0, 0, 0, 0});
		index_t nm = 0; // number of nucleotides tallied so far
		int iby = 0;      // initial byte offset
		int ibp = 0;      // initial base-pair offset
		if(startAtLocus) {
			iby = l._by;
			ibp = l._bp;
		} else {
			// Start at beginning
		}
		int by = iby, bp = ibp;
		assert_lt(bp, 4);
        index_t sideGbwtSz = this->_gh._sideGbwtSz >> (this->_gh.linearFM() ? 0 : 1);
        assert_lt(by, (int)sideGbwtSz);
		const uint8_t *side = l.side(this->gfm());
		while(nm < num) {
			int c = (side[by] >> (bp * 2)) & 3;
			assert_lt(maskOff + nm, masks[c].size());
			masks[0][maskOff + nm] = masks[1][maskOff + nm] =
			masks[2][maskOff + nm] = masks[3][maskOff + nm] = false;
			assert_range(0, 3, c);
			// Note: we tally $ just like an A
			arrs[c]++; // tally it
			ASSERT_ONLY(myarrs[c]++);
			masks[c][maskOff + nm] = true; // not dead
			nm++;
			if(++bp == 4) {
				bp = 0;
				by++;
                assert_leq(by, (int)sideGbwtSz);
				if(by == (int)sideGbwtSz) {
					// Fell off the end of the side
					break;
				}
			}
		}
		WITHIN_FCHR_DOLLARA(arrs);
#ifndef NDEBUG
		if(_sanity) {
			// Make sure results match up with a call to mapLFEx.
			index_t tops[4] = {0, 0, 0, 0};
			index_t bots[4] = {0, 0, 0, 0};
			index_t top = l.toBWRow(gh());
			index_t bot = top + nm;
			mapLFEx(top, bot, tops, bots, false);
			assert(myarrs[0] == (bots[0] - tops[0]) || myarrs[0] == (bots[0] - tops[0])+1);
			assert_eq(myarrs[1], bots[1] - tops[1]);
			assert_eq(myarrs[2], bots[2] - tops[2]);
			assert_eq(myarrs[3], bots[3] - tops[3]);
		}
#endif
		return nm;
	}

	/**
	 * Return the final character in row i (i.e. the i'th character in the
	 * BWT transform).  Note that the 'L' in the name of the function
	 * stands for 'last', as in the literature.
	 */
	inline int rowL(const SideLocus<index_t>& l) const {
		// Extract and return appropriate bit-pair
		return unpack_2b_from_8b(l.side(this->gfm())[l._by], l._bp);
	}

	/**
	 * Return the final character in row i (i.e. the i'th character in the
	 * BWT transform).  Note that the 'L' in the name of the function
	 * stands for 'last', as in the literature.
	 */
	inline int rowL(index_t i) const {
		// Extract and return appropriate bit-pair
		SideLocus<index_t> l;
		l.initFromRow(i, _gh, gfm());
		return rowL(l);
	}

	/**
	 * Given top and bot loci, calculate counts of all four DNA chars up to
	 * those loci.  Used for more advanced backtracking-search.
	 */
	inline void mapLFRange(
		const SideLocus<index_t>& ltop,
		const SideLocus<index_t>& lbot,
		index_t num,        // Number of elts
		index_t* cntsUpto,  // A/C/G/T counts up to top
		index_t* cntsIn,    // A/C/G/T counts within range
		EList<bool> *masks
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		assert(ltop.repOk(this->gh()));
		assert(lbot.repOk(this->gh()));
		assert_eq(num, lbot.toBWRow(this->gh()) - ltop.toBWRow(this->gh()));
		assert_eq(0, cntsUpto[0]); assert_eq(0, cntsIn[0]);
		assert_eq(0, cntsUpto[1]); assert_eq(0, cntsIn[1]);
		assert_eq(0, cntsUpto[2]); assert_eq(0, cntsIn[2]);
		assert_eq(0, cntsUpto[3]); assert_eq(0, cntsIn[3]);
		countBt2SideRange(ltop, num, cntsUpto, cntsIn, masks);
		assert_eq(num, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with individual calls to mapLF;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			index_t tops[4] = {0, 0, 0, 0};
			index_t bots[4] = {0, 0, 0, 0};
			assert(ltop.repOk(this->gh()));
			assert(lbot.repOk(this->gh()));
			mapLFEx(ltop, lbot, tops, bots, false);
			for(int i = 0; i < 4; i++) {
				assert(cntsUpto[i] == tops[i] || tops[i] == bots[i]);
				if(i == 0) {
					assert(cntsIn[i] == bots[i]-tops[i] ||
						   cntsIn[i] == bots[i]-tops[i]+1);
				} else {
					assert_eq(cntsIn[i], bots[i]-tops[i]);
				}
			}
		}
#endif
	}

	/**
	 * Given row i, return the row that the LF mapping maps i to.
	 */
	inline index_t mapLF(
		const SideLocus<index_t>& l
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		ASSERT_ONLY(index_t srcrow = l.toBWRow(_gh));
		index_t ret;
		assert(l.side(this->gfm()) != NULL);
		int c = rowL(l);
		assert_lt(c, 4);
		assert_geq(c, 0);
		ret = countBt2Side(l, c);
		assert_lt(ret, this->_gh._gbwtLen);
		assert_neq(srcrow, ret);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with results from mapLFEx;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			index_t arrs[] = { 0, 0, 0, 0 };
			mapLFEx(l, arrs, true);
			assert_eq(arrs[c], ret);
		}
#endif
		return ret;
	}

	/**
	 * Given row i and character c, return the row that the LF mapping maps
	 * i to on character c.
	 */
	inline index_t mapLF(
		const SideLocus<index_t>& l, int c
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		index_t ret;
		assert_lt(c, 4);
		assert_geq(c, 0);
		ret = countBt2Side(l, c);
		assert_lt(ret, this->_gh._gbwtLen);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with results from mapLFEx;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			index_t arrs[] = { 0, 0, 0, 0 };
			mapLFEx(l, arrs, true);
			assert_eq(arrs[c], ret);
		}
#endif
		return ret;
	}
    
    /**
     * Given row i and character c, return the row that the GLF mapping maps
     * i to on character c.
     */
    inline pair<index_t, index_t> mapLF(
                                        SideLocus<index_t>& tloc, SideLocus<index_t>& bloc, int c,
                                        pair<index_t, index_t>* node_range = NULL
                                        ASSERT_ONLY(, bool overrideSanity = false)
                                        ) const
    {
        assert_lt(c, 4);
        assert_geq(c, 0);
        index_t top = mapLF(tloc, c);
        index_t bot = mapLF(bloc, c);
        if(node_range != NULL) {
            node_range->first = top; node_range->second = bot;
        }
        return pair<index_t, index_t>(top, bot);
    }
    
    /**
     * Given row i and character c, return the row that the GLF mapping maps
     * i to on character c.
     */
    inline pair<index_t, index_t> mapGLF(
                                         SideLocus<index_t>& tloc, SideLocus<index_t>& bloc, int c,
                                         pair<index_t, index_t>* node_range = NULL,
                                         EList<pair<index_t, index_t> >* node_iedges = NULL,
                                         index_t k = 5
                                         ASSERT_ONLY(, bool overrideSanity = false)
                                         ) const
    {
        assert_lt(c, 4);
        assert_geq(c, 0);
        index_t top = mapLF(tloc, c);
        index_t bot = mapLF(bloc, c);
        if(gh().linearFM()) {
            if(node_range != NULL) {
                node_range->first = top; node_range->second = bot;
            }
            if(node_iedges != NULL) {
                node_iedges->clear();
            }
            return pair<index_t, index_t>(top, bot);
        }
        if(top + 1 >= gh()._gbwtLen || top >= bot) {
            assert_eq(top, bot);
            return pair<index_t, index_t>(0, 0);
        }

        tloc.initFromRow_bit(top + 1, gh(), gfm());
        index_t node_top = rank_M(tloc) - 1;

        index_t top_F_loc = 0, top_M_occ = 0;
        size_t iter = 0;
        while(true) {
            const uint8_t *side = tloc.side(gfm()) + gh()._sideGbwtSz - gh()._sideSz * iter;
            top_F_loc = *((index_t*)side);
            side += sizeof(index_t);
            top_M_occ = *((index_t*)side);
            assert_leq(top_M_occ, node_top + 1);
            if(top_M_occ <= node_top) break;
            iter++;
        }
        if(top_M_occ > 0) top_F_loc++;
        
        tloc.initFromRow_bit(top_F_loc, gh(), gfm());
        
        if(node_top + 1 > top_M_occ) {
            top = select_F(tloc, node_top + 1 - top_M_occ);
        } else {
            top = top_F_loc;
        }
        
        bloc.initFromRow_bit(bot, gh(), gfm());
        index_t node_bot = rank_M(bloc);
        
        const uint8_t *side = bloc.side(gfm()) + gh()._sideGbwtSz;
        index_t bot_F_loc = *((index_t*)side);
        side += sizeof(index_t);
        index_t bot_M_occ = *((index_t*)side);
        assert_leq(bot_M_occ, node_bot + 1);
        if(bot_M_occ > 0) bot_F_loc++;
        
        bloc.initFromRow_bit(bot_F_loc, gh(), gfm());
        
        if(node_bot + 1 > bot_M_occ) {
            bot = select_F(bloc, node_bot + 1 - bot_M_occ);
        } else {
            bot = bot_F_loc;
        }
        
        if(node_range != NULL) {
            (*node_range).first = node_top;
            (*node_range).second = node_bot;
        }
        assert_leq(node_bot - node_top, bot - top);
        if(node_iedges != NULL && node_bot - node_top <= k && node_bot - node_top < bot - top) {
            getInEdgeCount(top, bot, *node_iedges);
        }
        
        return pair<index_t, index_t>(top, bot);
    }

	/**
	 * Given top and bot loci, calculate counts of all four DNA chars up to
	 * those loci.  Also, update a set of tops and bots for the reverse
	 * index/direction using the idea from the bi-directional BWT paper.
	 */
	inline void mapBiLFEx(
		const SideLocus<index_t>& ltop,
		const SideLocus<index_t>& lbot,
		index_t *tops,
		index_t *bots,
		index_t *topsP, // topsP[0] = top
		index_t *botsP
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
#ifndef NDEBUG
		for(int i = 0; i < 4; i++) {
			assert_eq(0, tops[0]);  assert_eq(0, bots[0]);
		}
#endif
		countBt2SideEx(ltop, tops);
		countBt2SideEx(lbot, bots);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with individual calls to mapLF;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			assert_eq(mapLF(ltop, 0, true), tops[0]);
			assert_eq(mapLF(ltop, 1, true), tops[1]);
			assert_eq(mapLF(ltop, 2, true), tops[2]);
			assert_eq(mapLF(ltop, 3, true), tops[3]);
			assert_eq(mapLF(lbot, 0, true), bots[0]);
			assert_eq(mapLF(lbot, 1, true), bots[1]);
			assert_eq(mapLF(lbot, 2, true), bots[2]);
			assert_eq(mapLF(lbot, 3, true), bots[3]);
		}
#endif
		// bots[0..3] - tops[0..3] = # of ways to extend the suffix with an
		// A, C, G, T
		botsP[0] = topsP[0] + (bots[0] - tops[0]);
		topsP[1] = botsP[0];
		botsP[1] = topsP[1] + (bots[1] - tops[1]);
		topsP[2] = botsP[1];
		botsP[2] = topsP[2] + (bots[2] - tops[2]);
		topsP[3] = botsP[2];
		botsP[3] = topsP[3] + (bots[3] - tops[3]);
	}

	/**
	 * Given row and its locus information, proceed on the given character
	 * and return the next row, or all-fs if we can't proceed on that
	 * character.  Returns 0xffffffff if this row ends in $.
	 */
	inline index_t mapLF1(
		index_t row,       // starting row
		const SideLocus<index_t>& l, // locus for starting row
		int c               // character to proceed on
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
        if(rowL(l) != c) return (index_t)INDEX_MAX;
        for(index_t i = 0; i < _zOffs.size(); i++) {
            if(row == _zOffs[i]) return (index_t)INDEX_MAX;
        }
		index_t ret;
		assert_lt(c, 4);
		assert_geq(c, 0);
		ret = countBt2Side(l, c);
		assert_lt(ret, this->_gh._gbwtLen);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with results from mapLFEx;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			index_t arrs[] = { 0, 0, 0, 0 };
			mapLFEx(l, arrs, true);
			assert_eq(arrs[c], ret);
		}
#endif
		return ret;
	}


	/**
	 * Given row and its locus information, set the row to LF(row) and
	 * return the character that was in the final column.
	 */
	inline int mapLF1(
		index_t& row,      // starting row
		const SideLocus<index_t>& l  // locus for starting row
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
        for(index_t i = 0; i < _zOffs.size(); i++) {
            if(row == _zOffs[i]) return -1;
        }
		int c = rowL(l);
		assert_range(0, 3, c);
		row = countBt2Side(l, c);
		assert_lt(row, this->_gh._gbwtLen);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with results from mapLFEx;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			index_t arrs[] = { 0, 0, 0, 0 };
			mapLFEx(l, arrs, true);
			assert_eq(arrs[c], row);
		}
#endif
		return c;
	}
    
    /**
     * Given row and its locus information, proceed on the given character
     * and return the next row, or all-fs if we can't proceed on that
     * character.  Returns 0xffffffff if this row ends in $.
     */
    inline pair<index_t, index_t> mapGLF1(
                                          index_t row,           // starting row
                                          SideLocus<index_t>& l, // locus for starting row
                                          int c,                 // character to proceed
                                          pair<index_t, index_t>* node_range = NULL
                                          ASSERT_ONLY(, bool overrideSanity = false)
                                          ) const
    {
        assert_lt(c, 4);
        assert_geq(c, 0);
        index_t top = mapLF1(row, l, c);
        if(top == (index_t)INDEX_MAX) return pair<index_t, index_t>(0, 0);
        if(gh().linearFM()) {
            if(node_range != NULL) {
                node_range->first = top; node_range->second = top + 1;
            }
            return pair<index_t, index_t>(top, top + 1);
        }
        index_t bot = top;
        
        l.initFromRow_bit(top + 1, gh(), gfm());
        index_t node_top = rank_M(l) - 1;
        
        index_t F_loc = 0, M_occ = 0;
        size_t iter = 0;
        while(true) {
            const uint8_t *side = l.side(gfm()) + gh()._sideGbwtSz - gh()._sideSz * iter;
            F_loc = *((index_t*)side);
            side += sizeof(index_t);
            M_occ = *((index_t*)side);
            assert_leq(M_occ, node_top + 1);
            if(M_occ <= node_top) break;
            iter++;
        }
        if(M_occ > 0) F_loc++;
        
        l.initFromRow_bit(F_loc, gh(), gfm());
        
        if(node_top + 1 > M_occ) {
            top = select_F(l, node_top + 1 - M_occ);
        } else {
            top = F_loc;
        }
        
        index_t node_bot = node_top + 1;
        if(node_bot + 1 > M_occ) {
            SideLocus<index_t> l2;
#if 0
            l2.initFromRow_bit(top + 1, gh(), gfm());
            bot = select_F(l2, 1);
            ASSERT_ONLY(index_t bot2 = select_F(l, node_bot + 1 - M_occ));
            assert_eq(bot, bot2);
#else
            bot = select_F(l, node_bot + 1 - M_occ);
#endif
        } else {
            bot = F_loc;
        }
        
        if(node_range != NULL) {
            (*node_range).first = node_top;
            (*node_range).second = node_bot;
        }
        
        return pair<index_t, index_t>(top, bot);
    }
    
    /**
     * Given row and its locus information, proceed on the given character
     * and return the next row, or all-fs if we can't proceed on that
     * character.  Returns 0xffffffff if this row ends in $.
     */
    inline pair<index_t, index_t> mapGLF1(
                                          index_t row,           // starting row
                                          SideLocus<index_t>& l, // locus for starting row
                                          pair<index_t, index_t>* node_range = NULL
                                          ASSERT_ONLY(, bool overrideSanity = false)
                                          ) const
    {
        for(index_t i = 0; i < _zOffs.size(); i++) {
            if(row == _zOffs[i]) return pair<index_t, index_t>((index_t)INDEX_MAX, (index_t)INDEX_MAX);
        }

        mapLF1(row, l);
        index_t top = row;
        if(top == (index_t)INDEX_MAX) return pair<index_t, index_t>(0, 0);
        if(gh().linearFM()) {
            if(node_range != NULL) {
                node_range->first = top; node_range->second = top + 1;
            }
            return pair<index_t, index_t>(top, top + 1);
        }
        index_t bot = top;
        
        l.initFromRow_bit(top + 1, gh(), gfm());
        index_t node_top = rank_M(l) - 1;
        
        index_t F_loc = 0, M_occ = 0;
        size_t iter = 0;
        while(true) {
            const uint8_t *side = l.side(gfm()) + gh()._sideGbwtSz - gh()._sideSz * iter;
            F_loc = *((index_t*)side);
            side += sizeof(index_t);
            M_occ = *((index_t*)side);
            assert_leq(M_occ, node_top + 1);
            if(M_occ <= node_top) break;
            iter++;
        }
        if(M_occ > 0) F_loc++;
        
        l.initFromRow_bit(F_loc, gh(), gfm());
        
        if(node_top + 1 > M_occ) {
            top = select_F(l, node_top + 1 - M_occ);
        } else {
            top = F_loc;
        }
        
        index_t node_bot = node_top + 1;
        if(node_bot + 1 > M_occ) {
#if 0
            l2.initFromRow_bit(top + 1, gh(), gfm());
            bot = select_F(l2, 1);
            ASSERT_ONLY(index_t bot2 = select_F(l, node_bot + 1 - M_occ));
            assert_eq(bot, bot2);
#else
            bot = select_F(l, node_bot + 1 - M_occ);
#endif
        } else {
            bot = F_loc;
        }
        
        if(node_range != NULL) {
            (*node_range).first = node_top;
            (*node_range).second = node_bot;
        }
        
        return pair<index_t, index_t>(top, bot);
    }
    
    /**
     * Given row i, return rank
     */
    inline index_t rank_M(
                          const SideLocus<index_t>& l
                          ASSERT_ONLY(, bool overrideSanity = false)
                          ) const
    {
        index_t ret = countMSide(l);
        assert_leq(ret, this->_gh._numNodes);
        return ret;
    }
    
    /**
     * Given row i, return select
     */
    inline index_t select_F(
                            SideLocus<index_t> l,
                            index_t count
                            ASSERT_ONLY(, bool overrideSanity = false)
                            ) const
    {
        assert_gt(count, 0);
        const uint8_t *side = l.side(this->gfm()) + (_gh._sideGbwtSz >> 1);
        while(true) {
            index_t remainingBitsSide = (_gh._sideGbwtSz << 1) - l._charOff;
            assert_gt(remainingBitsSide, 0);
            index_t minSide = (count < remainingBitsSide ? count : remainingBitsSide);
            uint64_t bits = *(uint64_t*)&side[l._by];
            uint8_t advance = 64;
            if(l._bp > 0) {
                bits >>= l._bp;
                advance -= l._bp;
            }
            if(minSide < advance) {
                advance = minSide;
                bits <<= (64 - minSide);
            }
	    uint8_t tmp_count = 0;
#ifdef POPCNT_CAPABILITY
	    if(_usePOPCNTinstruction) {
            tmp_count = countInU64_bits<USE_POPCNT_INSTRUCTION>(bits);
	    } else {
            tmp_count = countInU64_bits<USE_POPCNT_GENERIC_BITS>(bits);
	    }
#else
	    tmp_count = countInU64_bits(bits);
#endif
            assert_leq(tmp_count, count);
            count -= tmp_count;
            if(count == 0) {
                assert_gt(advance, 0);
                l._charOff += (advance - 1);
                assert_lt(l._charOff, _gh._sideGbwtSz << 1);
                l._by = l._charOff >> 3;
                l._bp = l._charOff & 0x7;
                break;
            }

            assert_leq(l._charOff + advance, (_gh._sideGbwtSz << 1));
            if(l._charOff + advance == (_gh._sideGbwtSz << 1)) {
                l.nextSide(_gh);
                side = l.side(this->gfm()) + (_gh._sideGbwtSz >> 1);
            } else {
                l._charOff += advance;
                l._by = l._charOff >> 3;
                l._bp = l._charOff & 0x7;
            }
        }
        return l.toBWRow(_gh);
    }
    
    /**
     *
     */
    inline void getInEdgeCount(
                               index_t top,
                               index_t bot,
                               EList<pair<index_t, index_t> >& node_iedges) const
    {
        assert_lt(top, bot);
        node_iedges.clear();
        SideLocus<index_t> l; l.initFromRow_bit(top, _gh, gfm());
        const uint8_t *side = l.side(this->gfm()) + (_gh._sideGbwtSz >> 1);
        assert_lt(l._by, (_gh._sideGbwtSz >> 2));
        assert_eq((side[l._by] >> l._bp) & 0x1, 0x1);
        bool first = true;
        index_t curr_node = 0;
        index_t num0s = 0;
        while(top < bot) {
            if(first) {
                first = false;
            } else {
                int bit = (side[l._by] >> l._bp) & 0x1;
                if(bit == 0x1) {
                    curr_node++;
                    num0s = 0;
                } else {
                    num0s++;
                    if(num0s == 1) {
                        node_iedges.expand();
                        node_iedges.back().first = curr_node;
                    }
                    node_iedges.back().second = num0s;
                }
            }
            if(l._charOff + 1 == (_gh._sideGbwtSz << 1)) {
                l.nextSide(_gh);
                side = l.side(this->gfm()) + (_gh._sideGbwtSz >> 1);
            } else {
                l._charOff++;
                l._by = l._charOff >> 3;
                l._bp = l._charOff & 0x7;
            }
            top++;
        }
    }
    

#ifndef NDEBUG
	/// Check that in-memory Ebwt is internally consistent with respect
	/// to given EbwtParams; assert if not
	bool inMemoryRepOk(const GFMParams<index_t>& gh) const {
        assert_eq(_zOffs.size(), _zGbwtByteOffs.size());
        assert_eq(_zOffs.size(), _zGbwtBpOffs.size());
        for(index_t i = 0; i < _zOffs.size(); i++) {
            assert_geq(_zGbwtBpOffs[i], 0);
            assert_lt(_zGbwtBpOffs[i], 4);
            assert_lt(_zGbwtByteOffs[i], gh._gbwtTotSz);
            assert_lt(_zOffs[i], gh._gbwtLen);
        }
		assert_geq(_nFrag, _nPat);
        assert_eq(_alts.size(), _altnames.size());
		return true;
	}

	/// Check that in-memory Ebwt is internally consistent; assert if
	/// not
	bool inMemoryRepOk() const {
		return repOk(_gh);
	}

	/// Check that Ebwt is internally consistent with respect to given
	/// EbwtParams; assert if not
	bool repOk(const GFMParams<index_t>& gh) const {
		assert(_gh.repOk());
		if(isInMemory()) {
			return inMemoryRepOk(gh);
		}
		return true;
	}

	/// Check that Ebwt is internally consistent; assert if not
	bool repOk() const {
		return repOk(_gh);
	}
#endif

	bool       _toBigEndian;
	int32_t    _overrideOffRate;
	bool       _verbose;
	bool       _passMemExc;
	bool       _sanity;
	bool       fw_;     // true iff this is a forward index
	FILE       *_in1;   // input fd for primary index file
	FILE       *_in2;   // input fd for secondary index file
	string     _in1Str; // filename for primary index file
	string     _in2Str; // filename for secondary index file
    EList<index_t> _zOffs;
	EList<index_t> _zGbwtByteOffs;
	EList<int>     _zGbwtBpOffs;
	index_t    _nPat;  /// number of reference texts
	index_t    _nFrag; /// number of fragments
	APtrWrap<index_t> _plen;
	APtrWrap<index_t> _rstarts; // starting offset of fragments / text indexes
	// _fchr, _ftab and _eftab are expected to be relatively small
	// (usually < 1MB, perhaps a few MB if _fchr is particularly large
	// - like, say, 11).  For this reason, we don't bother with writing
	// them to disk through separate output streams; we
	APtrWrap<index_t> _fchr;
	APtrWrap<index_t> _ftab;
	APtrWrap<index_t> _eftab; // "extended" entries for _ftab
	// _offs may be extremely large.  E.g. for DNA w/ offRate=4 (one
	// offset every 16 rows), the total size of _offs is the same as
	// the total size of the input sequence
    APtrWrap<index_t> _offs;

    // _ebwt is the Extended Burrows-Wheeler Transform itself, and thus
	// is at least as large as the input sequence.
	APtrWrap<uint8_t> _gfm;
	bool       _useMm;        /// use memory-mapped files to hold the index
	bool       useShmem_;     /// use shared memory to hold large parts of the index
	EList<string> _refnames; /// names of the reference sequences
    EList<string> _refnames_nospace; // names of the reference sequences (names stop at space)
    char *mmFile1_;
	char *mmFile2_;
    int _nthreads;
	GFMParams<index_t> _gh;
	bool packed_;

	static const uint64_t default_bmax = INDEX_MAX;
	static const uint64_t default_bmaxMultSqrt = INDEX_MAX;
	static const uint64_t default_bmaxDivN = 4;
	static const int      default_dcv = 1024;
	static const bool     default_noDc = false;
	static const bool     default_useBlockwise = true;
	static const uint32_t default_seed = 0;
#ifdef BOWTIE_64BIT_INDEX
    static const int      default_lineRate_gfm = 8;
    static const int      default_lineRate_fm  = 7;
#else
	static const int      default_lineRate_gfm = 7;
    static const int      default_lineRate_fm  = 6;
#endif
	static const int      default_offRate = 5;
	static const int      default_offRatePlus = 0;
	static const int      default_ftabChars = 10;
	static const bool     default_bigEndian = false;
    
    // data used to build an index
    EList<ALT<index_t> >       _alts;
    EList<string>              _altnames;
    EList<Haplotype<index_t> > _haplotypes;
    RepeatDB<index_t>          _repeatdb;
    EList<RB_KmerTable>        _repeat_kmertables;

    bool _repeat;
    EList<pair<index_t, index_t> > _repeatLens;
    EList<uint8_t>                 _repeatIncluded;

protected:

	ostream& log() const {
		return cerr; // TODO: turn this into a parameter
	}

	/// Print a verbose message and flush (flushing is helpful for
	/// debugging)
	void verbose(const string& s) const {
		if(this->verbose()) {
			this->log() << s.c_str();
			this->log().flush();
		}
	}
};

/**
 * Read reference names from an input stream 'in' for an Ebwt primary
 * file and store them in 'refnames'.
 */
template <typename index_t>
void readEbwtRefnames(istream& in, EList<string>& refnames) {
    // _in1 must already be open with the get cursor at the
    // beginning and no error flags set.
    assert(in.good());
    assert_eq((streamoff)in.tellg(), ios::beg);
    
    // Read endianness hints from both streams
    bool switchEndian = false;
    uint32_t one = readU32(in, switchEndian); // 1st word of primary stream
    if(one != 1) {
        assert_eq((1u<<24), one);
        switchEndian = true;
    }
    readU32(in, switchEndian); // version
    
    // Reads header entries one by one from primary stream
    index_t len           = readIndex<index_t>(in, switchEndian);
    index_t gbwtLen       = readIndex<index_t>(in, switchEndian);
    index_t numNodes      = readIndex<index_t>(in, switchEndian);
    int32_t lineRate      = readI32(in, switchEndian);
    /*int32_t  linesPerSide =*/ readI32(in, switchEndian);
    int32_t offRate       = readI32(in, switchEndian);
    int32_t ftabChars     = readI32(in, switchEndian);
    index_t eftabLen      = readIndex<index_t>(in, switchEndian);
    // BTL: chunkRate is now deprecated
    int32_t flags = readI32(in, switchEndian);
    bool entireReverse = false;
    if(flags < 0) {
        entireReverse = (((-flags) & GFM_ENTIRE_REV) != 0);
    }
    
    // Create a new EbwtParams from the entries read from primary stream
    GFMParams<index_t> gh(len, gbwtLen, numNodes, lineRate, offRate, ftabChars, eftabLen, entireReverse);
    
    index_t nPat = readIndex<index_t>(in, switchEndian); // nPat
    in.seekg(nPat*sizeof(index_t), ios_base::cur); // skip plen
    
    // Skip rstarts
    index_t nFrag = readIndex<index_t>(in, switchEndian);
    in.seekg(nFrag*sizeof(index_t)*3, ios_base::cur);
    
    // Skip ebwt
    in.seekg(gh._gbwtTotLen, ios_base::cur);
    
    // Skip zOff from primary stream
    index_t numZOffs = readIndex<index_t>(in, switchEndian);
    in.seekg(numZOffs * sizeof(index_t), ios_base::cur);
    
    // Skip fchr
    in.seekg(5 * sizeof(index_t), ios_base::cur);
    
    // Skip ftab
    in.seekg(gh._ftabLen*sizeof(index_t), ios_base::cur);
    
    // Skip eftab
    in.seekg(gh._eftabLen*sizeof(index_t), ios_base::cur);
    
    // Read reference sequence names from primary index file
    while(true) {
        char c = '\0';
        in.read(&c, 1);
        if(in.eof()) break;
        if(c == '\0') break;
        else if(c == '\n') {
            refnames.push_back("");
        } else {
            if(refnames.size() == 0) {
                refnames.push_back("");
            }
            refnames.back().push_back(c);
        }
    }
    if(refnames.back().empty()) {
        refnames.pop_back();
    }
    
    // Be kind
    in.clear(); in.seekg(0, ios::beg);
    assert(in.good());
}

/**
 * Read reference names from the index with basename 'in' and store
 * them in 'refnames'.
 */
template <typename index_t>
void readEbwtRefnames(const string& instr, EList<string>& refnames) {
    ifstream in;
    // Initialize our primary and secondary input-stream fields
    in.open((instr + ".1." + gfm_ext).c_str(), ios_base::in | ios::binary);
    if(!in.is_open()) {
        throw GFMFileOpenException("Cannot open file " + instr);
    }
    assert(in.is_open());
    assert(in.good());
    assert_eq((streamoff)in.tellg(), ios::beg);
    readEbwtRefnames<index_t>(in, refnames);
}

///////////////////////////////////////////////////////////////////////
//
// Functions for building Ebwts
//
///////////////////////////////////////////////////////////////////////

/**
 * Join several text strings together in a way that's compatible with
 * the text-chunking scheme dictated by chunkRate parameter.
 *
 * The non-static member Ebwt::join additionally builds auxilliary
 * arrays that maintain a mapping between chunks in the joined string
 * and the original text strings.
 */
template <typename index_t>
template <typename TStr>
TStr GFM<index_t>::join(EList<TStr>& l, uint32_t seed) {
	RandomSource rand; // reproducible given same seed
	rand.init(seed);
	TStr ret;
	index_t guessLen = 0;
	for(index_t i = 0; i < l.size(); i++) {
		guessLen += length(l[i]);
	}
	ret.resize(guessLen);
	index_t off = 0;
	for(size_t i = 0; i < l.size(); i++) {
		TStr& s = l[i];
		assert_gt(s.length(), 0);
		for(size_t j = 0; j < s.size(); j++) {
			ret.set(s[j], off++);
		}
	}
	return ret;
}

/**
 * Join several text strings together in a way that's compatible with
 * the text-chunking scheme dictated by chunkRate parameter.
 *
 * The non-static member Ebwt::join additionally builds auxilliary
 * arrays that maintain a mapping between chunks in the joined string
 * and the original text strings.
 */
template <typename index_t>
template <typename TStr>
void GFM<index_t>::join(EList<FileBuf*>& l,
                        EList<RefRecord>& szs,
                        index_t sztot,
                        const RefReadInParams& refparams,
                        uint32_t seed,
                        TStr& s,
                        bool include_rc,
						bool CGtoTG)
{
	RandomSource rand; // reproducible given same seed
	rand.init(seed);
	RefReadInParams rpcp = refparams;
	index_t guessLen = sztot;
    if(include_rc) {
        s.resize(guessLen << 1);
    } else {
        s.resize(guessLen);
    }
	ASSERT_ONLY(index_t szsi = 0);
	TIndexOffU dstoff = 0;
	for(index_t i = 0; i < l.size(); i++) {
		// For each sequence we can pull out of istream l[i]...
		assert(!l[i]->eof());
		bool first = true;
		while(!l[i]->eof()) {
			RefRecord rec = fastaRefReadAppend(*l[i], first, s, dstoff, rpcp);
			first = false;
			index_t bases = (index_t)rec.len;
            assert_eq(rec.off, szs[szsi].off);
			assert_eq(rec.len, szs[szsi].len);
			assert_eq(rec.first, szs[szsi].first);
			ASSERT_ONLY(szsi++);
			if(bases == 0) continue;
		}
	}

    // Change 'C' in CG to 'T' so that CG becomes TG
    if(CGtoTG) {
        for(TIndexOffU i = 0; i + 1 < guessLen; i++) {
            int nt1 = s[i], nt2 = s[i+1];
            if(nt1 == 1 && nt2 == 2) {
                s[i] = 3;
            }
        }
    }

    // Append reverse complement
    if(include_rc) {
        for (TIndexOffU i = 0; i < guessLen; i++) {
            int nt = s[guessLen - i - 1];
            assert_range(0, 3, nt);
            s[guessLen + i] = dnacomp[nt];
        }
    }
}

/**
 * Join several text strings together according to the text-chunking
 * scheme specified in the EbwtParams.  Ebwt fields calculated in this
 * function are written directly to disk.
 *
 * It is assumed, but not required, that the header values have already
 * been written to 'out1' before this function is called.
 *
 * The static member Ebwt::join just returns a joined version of a
 * list of strings without building any of the auxilliary arrays.
 */
template <typename index_t>
template <typename TStr>
void GFM<index_t>::joinToDisk(
	EList<FileBuf*>& l,
	EList<RefRecord>& szs,
	index_t sztot,
	const RefReadInParams& refparams,
	TStr& ret,
	ostream& out1,
	ostream& out2)
{
	RefReadInParams rpcp = refparams;
	assert_gt(szs.size(), 0);
	assert_gt(l.size(), 0);
	assert_gt(sztot, 0);
	// Not every fragment represents a distinct sequence - many
	// fragments may correspond to a single sequence.  Count the
	// number of sequences here by counting the number of "first"
	// fragments.
	this->_nPat = 0;
	this->_nFrag = 0;
	for(index_t i = 0; i < szs.size(); i++) {
		if(szs[i].len > 0) this->_nFrag++;
		if(szs[i].first && szs[i].len > 0) this->_nPat++;
	}
	assert_gt(this->_nPat, 0);
	assert_geq(this->_nFrag, this->_nPat);
	_rstarts.reset();
	writeIndex<index_t>(out1, this->_nPat, this->toBe());
	// Allocate plen[]
	try {
		this->_plen.init(new index_t[this->_nPat], this->_nPat);
	} catch(bad_alloc& e) {
		cerr << "Out of memory allocating plen[] in Ebwt::join()"
		     << " at " << __FILE__ << ":" << __LINE__ << endl;
		throw e;
	}
	// For each pattern, set plen
	int npat = -1;
	for(index_t i = 0; i < szs.size(); i++) {
		if(szs[i].first && szs[i].len > 0) {
			if(npat >= 0) {
				writeIndex<index_t>(out1, this->plen()[npat], this->toBe());
			}
			npat++;
			this->plen()[npat] = (szs[i].len + szs[i].off);
		} else {
			this->plen()[npat] += (szs[i].len + szs[i].off);
		}
	}
	assert_eq((index_t)npat, this->_nPat-1);
	writeIndex<index_t>(out1, this->plen()[npat], this->toBe());
	// Write the number of fragments
	writeIndex<index_t>(out1, this->_nFrag, this->toBe());
	index_t seqsRead = 0;
	ASSERT_ONLY(index_t szsi = 0);
	ASSERT_ONLY(index_t entsWritten = 0);
	index_t dstoff = 0;
	// For each filebuf
	for(unsigned int i = 0; i < l.size(); i++) {
		assert(!l[i]->eof());
		bool first = true;
		index_t patoff = 0;
		// For each *fragment* (not necessary an entire sequence) we
		// can pull out of istream l[i]...
		while(!l[i]->eof()) {
			string name;
			// Push a new name onto our vector
			_refnames.push_back("");
			RefRecord rec = fastaRefReadAppend(
				*l[i], first, ret, dstoff, rpcp, &_refnames.back());
			first = false;
			index_t bases = rec.len;
			if(rec.first && rec.len > 0) {
				if(_refnames.back().length() == 0) {
					// If name was empty, replace with an index
					ostringstream stm;
					stm << seqsRead;
					_refnames.back() = stm.str();
				}
			} else {
				// This record didn't actually start a new sequence so
				// no need to add a name
				//assert_eq(0, _refnames.back().length());
				_refnames.pop_back();
			}
			// Increment seqsRead if this is the first fragment
			if(rec.first && rec.len > 0) seqsRead++;
            assert_lt(szsi, szs.size());
            assert_eq(rec.off, szs[szsi].off);
            assert_eq(rec.len, szs[szsi].len);
            assert_eq(rec.first, szs[szsi].first);
            assert(rec.first || rec.off > 0);
            ASSERT_ONLY(szsi++);
			assert_leq(bases, this->plen()[seqsRead-1]);
			// Reset the patoff if this is the first fragment
			if(rec.first) patoff = 0;
			patoff += rec.off; // add fragment's offset from end of last frag.
			// Adjust rpcps
			//index_t seq = seqsRead-1;
#ifndef NDEBUG
            if(bases > 0) {
                ASSERT_ONLY(entsWritten++);
            }
#endif
			// This is where rstarts elements are written to the output stream
			//writeU32(out1, oldRetLen, this->toBe()); // offset from beginning of joined string
			//writeU32(out1, seq,       this->toBe()); // sequence id
			//writeU32(out1, patoff,    this->toBe()); // offset into sequence
			patoff += (index_t)bases;
		}
		assert_gt(szsi, 0);
		l[i]->reset();
		assert(!l[i]->eof());
#ifndef NDEBUG
		int c = l[i]->get();
		assert_eq('>', c);
		assert(!l[i]->eof());
		l[i]->reset();
		assert(!l[i]->eof());
#endif
	}
	assert_eq(entsWritten, this->_nFrag);
}

/**
 * Build an Ebwt from a string 's' and its suffix array 'sa' (which
 * might actually be a suffix array *builder* that builds blocks of the
 * array on demand).  The bulk of the Ebwt, i.e. the ebwt and offs
 * arrays, is written directly to disk.  This is by design: keeping
 * those arrays in memory needlessly increases the footprint of the
 * building process.  Instead, we prefer to build the Ebwt directly
 * "to disk" and then read it back into memory later as necessary.
 *
 * It is assumed that the header values and join-related values (nPat,
 * plen) have already been written to 'out1' before this function
 * is called.  When this function is finished, it will have
 * additionally written ebwt, zOff, fchr, ftab and eftab to the primary
 * file and offs to the secondary file.
 *
 * Assume DNA/RNA/any alphabet with 4 or fewer elements.
 * Assume occ array entries are 32 bits each.
 *
 * @param sa            the suffix array to convert to a Ebwt
 * @param s             the original string
 * @param out
 */
template <typename index_t>
template <typename TStr>
void GFM<index_t>::buildToDisk(
                               PathGraph<index_t>& gbwt,
                               const TStr& s,
                               ostream& out1,
                               ostream& out2,
                               streampos headerPos)
{
    const GFMParams<index_t>& gh = this->_gh;
	assert(gh.repOk());
	assert_lt(s.length(), gh.gbwtLen());
	assert_eq(s.length(), gh._len);
	assert_gt(gh._lineRate, 3);
	
	index_t  gbwtLen = gh._gbwtLen;
    streampos out1pos = out1.tellp();
    if(headerPos < 0) {
        out1.seekp(8 + sizeof(index_t));
    } else {
        out1.seekp(headerPos);
    }
    writeIndex<index_t>(out1, gbwtLen, this->toBe());
    writeIndex<index_t>(out1, gh._numNodes, this->toBe());
    out1.seekp(out1pos);
    
	index_t  ftabLen = gh._ftabLen;
	index_t  sideSz = gh._sideSz;
	index_t  gbwtTotSz = gh._gbwtTotSz;
	index_t  fchr[] = {0, 0, 0, 0, 0};
	EList<index_t> ftab(EBWT_CAT);
    EList<index_t> zOffs;

	// Save # of occurrences of each character as we walk along the bwt
	index_t occ[4] = {0, 0, 0, 0};
	index_t occSave[4] = {0, 0, 0, 0};
    // # of occurrences of 1 in M arrays
    index_t M_occ = 0, M_occSave = 0;
    // Location in F that corresponds to 1 in M
    index_t F_loc = 0, F_locSave = 0;
    
    // Record rows that should "absorb" adjacent rows in the ftab.
    try {
        VMSG_NL("Allocating ftab, absorbFtab");
        ftab.resize(ftabLen);
        ftab.fillZero();
    } catch(bad_alloc &e) {
        cerr << "Out of memory allocating ftab[] "
        << "in GFM::buildToDisk() at " << __FILE__ << ":"
        << __LINE__ << endl;
        throw e;
    }
    
	// Allocate the side buffer; holds a single side as its being
	// constructed and then written to disk.  Reused across all sides.
#ifdef SIXTY4_FORMAT
	EList<uint64_t> gfmSide(EBWT_CAT);
#else
	EList<uint8_t> gfmSide(EBWT_CAT);
#endif
	try {
        // Used to calculate ftab and eftab, but having gfm costs a lot of memory
        _gfm.init(new uint8_t[gh._gbwtTotLen], gh._gbwtTotLen, true);
#ifdef SIXTY4_FORMAT
		gfmSide.resize(sideSz >> 3);
#else
		gfmSide.resize(sideSz);
#endif
	} catch(bad_alloc &e) {
		cerr << "Out of memory allocating ebwtSide[] in "
		     << "GFM::buildToDisk() at " << __FILE__ << ":"
		     << __LINE__ << endl;
		throw e;
	}

	// Points to the base offset within ebwt for the side currently
	// being written
	index_t side = 0;

	// Whether we're assembling a forward or a reverse bucket
    bool fw = true;
	int sideCur = 0;

	index_t si = 0;   // string offset (chars)
	ASSERT_ONLY(bool inSA = true); // true iff saI still points inside suffix
	                               // array (as opposed to the padding at the
	                               // end)
	// Iterate over packed bwt bytes
	VMSG_NL("Entering GFM loop");
	ASSERT_ONLY(index_t beforeGbwtOff = (index_t)out1.tellp());
	while(side < gbwtTotSz) {
		// Sanity-check our cursor into the side buffer
		assert_geq(sideCur, 0);
		assert_lt(sideCur, (int)gh._sideGbwtSz);
		assert_eq(0, side % sideSz); // 'side' must be on side boundary
        if(sideCur == 0) {
            memset(gfmSide.ptr(), 0, gh._sideGbwtSz);
            gfmSide[sideCur] = 0; // clear
        }
		assert_lt(side + sideCur, gbwtTotSz);
		// Iterate over bit-pairs in the si'th character of the BWT
#ifdef SIXTY4_FORMAT
		for(int bpi = 0; bpi < 32; bpi++, si++)
#else
		for(int bpi = 0; bpi < 4; bpi++, si++)
#endif
		{
			int gbwtChar = 0; // one of A, C, G, T, and Z
            int F= 0, M = 0;     // either 0 or 1
            index_t pos = 0;  // pos on joined string
            bool count = true;
			if(si < gbwtLen) {
				gbwt.nextRow(gbwtChar, F, M, pos);
                
				// (that might have triggered sa to calc next suf block)
				if(gbwtChar == 'Z') {
					// Don't add the 'Z' in the last column to the BWT
					// transform; we can't encode a $ (only A C T or G)
					// and counting it as, say, an A, will mess up the
					// LF mapping
					gbwtChar = 0; count = false;
#ifndef NDEBUG
                    if(zOffs.size() > 0) {
                        assert_gt(si, zOffs.back());
                    }
#endif
                    zOffs.push_back(si); // remember GBWT row that corresponds to the 0th suffix
				} else {
                    gbwtChar = asc2dna[gbwtChar];
                    assert_lt(gbwtChar, 4);
                    // Update the fchr
                    fchr[gbwtChar]++;
				}
                
                assert_lt(F, 2);
                assert_lt(M, 2);
                if(M == 1) {
                    assert_neq(F_loc, numeric_limits<index_t>::max());
                    F_loc = gbwt.nextFLocation();
#ifndef NDEBUG
                    if(F_loc > 0) {
                        assert_gt(F_loc, F_locSave);
                    }
#endif
                }
				// Suffix array offset boundary? - update offset array
				if(M == 1 && (M_occ & gh._offMask) == M_occ) {
					assert_lt((M_occ >> gh._offRate), gh._offsLen);
                    // Write offsets directly to the secondary output
					// stream, thereby avoiding keeping them in memory
                    writeIndex<index_t>(out2, pos, this->toBe());
				}
			} else {
				// Strayed off the end of the SA, now we're just
				// padding out a bucket
#ifndef NDEBUG
				if(inSA) {
					// Assert that we wrote all the characters in the
					// string before now
					assert_eq(si, gbwtLen);
					inSA = false;
				}
#endif
				// 'A' used for padding; important that padding be
				// counted in the occ[] array
				gbwtChar = 0;
                F = M = 0;
			}
			if(count) occ[gbwtChar]++;
            if(M) M_occ++;
			// Append BWT char to bwt section of current side
			if(fw) {
				// Forward bucket: fill from least to most
#ifdef SIXTY4_FORMAT
				gfmSide[sideCur] |= ((uint64_t)gbwtChar << (bpi << 1));
				if(gbwtChar > 0) assert_gt(gfmSide[sideCur], 0);
                // To be implemented ...
                assert(false);
                cerr << "Not implemented" << endl;
                exit(1);
#else
				pack_2b_in_8b(gbwtChar, gfmSide[sideCur], bpi);
				assert_eq((gfmSide[sideCur] >> (bpi*2)) & 3, gbwtChar);
                
                int F_sideCur = (gh._sideGbwtSz + sideCur) >> 1;
                int F_bpi = bpi + ((sideCur & 0x1) << 2); // Can be used as M_bpi as well
                pack_1b_in_8b(F, gfmSide[F_sideCur], F_bpi);
                assert_eq((gfmSide[F_sideCur] >> F_bpi) & 1, F);
                
                int M_sideCur = F_sideCur + (gh._sideGbwtSz >> 2);
                pack_1b_in_8b(M, gfmSide[M_sideCur], F_bpi);
                assert_eq((gfmSide[M_sideCur] >> F_bpi) & 1, M);
#endif
			} else {
				// Backward bucket: fill from most to least
#ifdef SIXTY4_FORMAT
				gfmSide[sideCur] |= ((uint64_t)gbwtChar << ((31 - bpi) << 1));
				if(gbwtChar > 0) assert_gt(gfmSide[sideCur], 0);
                // To be implemented ...
                assert(false);
                cerr << "Not implemented" << endl;
                exit(1);
#else
				pack_2b_in_8b(gbwtChar, gfmSide[sideCur], 3-bpi);
				assert_eq((gfmSide[sideCur] >> ((3-bpi)*2)) & 3, gbwtChar);
                // To be implemented ...
                assert(false);
                cerr << "Not implemented" << endl;
                exit(1);
#endif
			}
		} // end loop over bit-pairs
        assert_eq(0, (occ[0] + occ[1] + occ[2] + occ[3] + zOffs.size()) & 3);
#ifdef SIXTY4_FORMAT
		assert_eq(0, si & 31);
#else
		assert_eq(0, si & 3);
#endif

		sideCur++;
		if((sideCur << 1) == (int)gh._sideGbwtSz) {
			sideCur = 0;
			index_t *uside = reinterpret_cast<index_t*>(gfmSide.ptr());
			// Write 'A', 'C', 'G', 'T', and '1' in M tallies
			side += sideSz;
			assert_leq(side, gh._gbwtTotSz);
            uside[(sideSz / sizeof(index_t))-6] = endianizeIndex(F_locSave, this->toBe());
            uside[(sideSz / sizeof(index_t))-5] = endianizeIndex(M_occSave, this->toBe());
			uside[(sideSz / sizeof(index_t))-4] = endianizeIndex(occSave[0], this->toBe());
			uside[(sideSz / sizeof(index_t))-3] = endianizeIndex(occSave[1], this->toBe());
			uside[(sideSz / sizeof(index_t))-2] = endianizeIndex(occSave[2], this->toBe());
			uside[(sideSz / sizeof(index_t))-1] = endianizeIndex(occSave[3], this->toBe());
            F_locSave = F_loc;
            M_occSave = M_occ;
			occSave[0] = occ[0];
			occSave[1] = occ[1];
			occSave[2] = occ[2];
			occSave[3] = occ[3];
			// Write backward side to primary file
			out1.write((const char *)gfmSide.ptr(), sideSz);
            
            //
            memcpy(((char*)_gfm.get()) + side - sideSz, (const char *)gfmSide.ptr(), sideSz);
		}
	}
	VMSG_NL("Exited GFM loop");
	// Assert that our loop counter got incremented right to the end
	assert_eq(side, gh._gbwtTotSz);
	// Assert that we wrote the expected amount to out1
	assert_eq(((index_t)out1.tellp() - beforeGbwtOff), gh._gbwtTotSz);
	// assert that the last thing we did was write a forward bucket

	//
	// Write zOffs to primary stream
	//
    assert_gt(zOffs.size(), 0);
    writeIndex<index_t>(out1, (index_t)zOffs.size(), this->toBe());
    for(size_t i = 0; i < zOffs.size(); i++) {
        writeIndex<index_t>(out1, zOffs[i], this->toBe());
    }

    //
    // Finish building fchr
    //
    // Exclusive prefix sum on fchr
    for(int i = 1; i < 4; i++) {
        fchr[i] += fchr[i-1];
    }
    assert_lt(fchr[3], gbwtLen);
    // Shift everybody up by one
    for(int i = 4; i >= 1; i--) {
        fchr[i] = fchr[i-1];
    }
    fchr[0] = 0;
    if(_verbose) {
        for(int i = 0; i < 5; i++)
            cerr << "fchr[" << "ACGT$"[i] << "]: " << fchr[i] << endl;
    }
    // Write fchr to primary file
    for(int i = 0; i < 5; i++) {
        writeIndex<index_t>(out1, fchr[i], this->toBe());
    }
    _fchr.init(new index_t[5], 5, true);
    memcpy(_fchr.get(), fchr, sizeof(index_t) * 5);
    
    
    // Initialize _zGbwtByteOffs and _zGbwtBpOffs
    _zOffs = zOffs;
    postReadInit(gh);

    // Build ftab and eftab
    EList<pair<index_t, index_t> > tFtab;
    tFtab.resizeExact(ftabLen - 1);
    for(index_t i = 0; i + 1 < ftabLen; i++) {
        index_t q = i;
        pair<index_t, index_t> range(0, gh._gbwtLen);
        SideLocus<index_t> tloc, bloc;
        SideLocus<index_t>::initFromTopBot(range.first, range.second, gh, gfm(), tloc, bloc);
        index_t j = 0;
        for(; j < (index_t)gh._ftabChars; j++) {
            int nt = q & 0x3; q >>= 2;
            if(bloc.valid()) {
                range = mapGLF(tloc, bloc, nt);
            } else {
                range = mapGLF1(range.first, tloc, nt);
            }
            if(range.first == (index_t)INDEX_MAX || range.first >= range.second) {
                break;
            }
            if(range.first + 1 == range.second) {
                tloc.initFromRow(range.first, gh, gfm());
                bloc.invalidate();
            } else {
                SideLocus<index_t>::initFromTopBot(range.first, range.second, gh, gfm(), tloc, bloc);
            }
        }
        
        if(range.first >= range.second || j < (index_t)gh._ftabChars) {
            if(i == 0) {
                tFtab[i].first = tFtab[i].second = 0;
            } else {
                tFtab[i].first = tFtab[i].second = tFtab[i-1].second;
            }
        } else {
            tFtab[i].first = range.first;
            tFtab[i].second = range.second;
        }
        
#ifndef NDEBUG
        if(gbwt.ftab.size() > i) {
            assert_eq(tFtab[i].first, gbwt.ftab[i].first);
            assert_eq(tFtab[i].second, gbwt.ftab[i].second);
        }
#endif
    }
    
    // Clear memory
    _gfm.reset();
    _fchr.reset();
    _zOffs.clear();
    _zGbwtByteOffs.clear();
    _zGbwtBpOffs.clear();
    
	//
	// Finish building ftab and build eftab
	//
	// Prefix sum on ftable
	index_t eftabLen = 0;
	for(index_t i = 1; i + 1 < ftabLen; i++) {
        if(tFtab[i-1].second != tFtab[i].first) {
            eftabLen += 2;
        }
	}
    
    if(gh._gbwtLen + (eftabLen >> 1) < gh._gbwtLen) {
        cerr << "Too many eftab entries: "
        << gh._gbwtLen << " + " << (eftabLen >> 1)
        << " > " << (index_t)INDEX_MAX << endl;
        throw 1;
    }
    
    EList<index_t> eftab(EBWT_CAT);
	try {
		eftab.resize(eftabLen);
		eftab.fillZero();
	} catch(bad_alloc &e) {
		cerr << "Out of memory allocating eftab[] "
		     << "in GFM::buildToDisk() at " << __FILE__ << ":"
		     << __LINE__ << endl;
		throw e;
	}
	index_t eftabCur = 0;
    ftab[0] = tFtab[0].first;
    ftab[1] = tFtab[0].second;
    for(index_t i = 1; i + 1 < ftabLen; i++) {
        if(ftab[i] != tFtab[i].first) {
            index_t lo = ftab[i];
            index_t hi = tFtab[i].first;
            assert_lt(eftabCur*2+1, eftabLen);
            eftab[eftabCur*2] = lo;
            eftab[eftabCur*2+1] = hi;
            // one node can be shared, and one node can have at most four incoming edges
            assert_leq(lo, hi + 4);
            ftab[i] = (eftabCur++) ^ (index_t)INDEX_MAX; // insert pointer into eftab
            assert_eq(lo, GFM<index_t>::ftabLo(ftab.ptr(), eftab.ptr(), gbwtLen, ftabLen, eftabLen, i));
            assert_eq(hi, GFM<index_t>::ftabHi(ftab.ptr(), eftab.ptr(), gbwtLen, ftabLen, eftabLen, i));
        }
        ftab[i+1] = tFtab[i].second;
    }
#ifndef NDEBUG
    for(index_t i = 0; i + 1 < ftabLen; i++ ){
        assert_eq(tFtab[i].first, GFM<index_t>::ftabHi(ftab.ptr(), eftab.ptr(), gbwtLen, ftabLen, eftabLen, i));
        assert_eq(tFtab[i].second, GFM<index_t>::ftabLo(ftab.ptr(), eftab.ptr(), gbwtLen, ftabLen, eftabLen, i+1));
    }
#endif
	// Write ftab to primary file
	for(index_t i = 0; i < ftabLen; i++) {
		writeIndex<index_t>(out1, ftab[i], this->toBe());
	}
    // Write eftab to primary file
    out1pos = out1.tellp();
    if(headerPos < 0) {
        out1.seekp(24 + sizeof(index_t) * 3);
    } else {
        out1.seekp((int)headerPos + 16 + sizeof(index_t) * 2);
    }
    writeIndex<index_t>(out1, eftabLen, this->toBe());
    out1.seekp(out1pos);
    for(index_t i = 0; i < eftabLen; i++) {
        writeIndex<index_t>(out1, eftab[i], this->toBe());
    }
	// Note: if you'd like to sanity-check the Ebwt, you'll have to
	// read it back into memory first!
	assert(!isInMemory());
	VMSG_NL("Exiting GFM::buildToDisk()");
}

/**
 * Build an Ebwt from a string 's' and its suffix array 'sa' (which
 * might actually be a suffix array *builder* that builds blocks of the
 * array on demand).  The bulk of the Ebwt, i.e. the ebwt and offs
 * arrays, is written directly to disk.  This is by design: keeping
 * those arrays in memory needlessly increases the footprint of the
 * building process.  Instead, we prefer to build the Ebwt directly
 * "to disk" and then read it back into memory later as necessary.
 *
 * It is assumed that the header values and join-related values (nPat,
 * plen) have already been written to 'out1' before this function
 * is called.  When this function is finished, it will have
 * additionally written ebwt, zOff, fchr, ftab and eftab to the primary
 * file and offs to the secondary file.
 *
 * Assume DNA/RNA/any alphabet with 4 or fewer elements.
 * Assume occ array entries are 32 bits each.
 *
 * @param sa            the suffix array to convert to a Ebwt
 * @param s             the original string
 * @param out
 */
template <typename index_t>
template <typename TStr>
void GFM<index_t>::buildToDisk(
                               InorderBlockwiseSA<TStr>& sa,
                               const TStr& s,
                               ostream& out1,
                               ostream& out2,
                               streampos headerPos)
{
    const GFMParams<index_t>& gh = this->_gh;
    assert(gh.repOk());
    assert(gh.linearFM());
    assert_lt(s.length(), gh.gbwtLen());
    assert_eq(s.length(), gh._len);
    assert_gt(gh._lineRate, 3);
    
    index_t  len = gh._len;
    index_t  gbwtLen = gh._gbwtLen;
    assert_eq(len + 1, gbwtLen);
    streampos out1pos = out1.tellp();
    if(headerPos < 0) {
        out1.seekp(8 + sizeof(index_t));
    } else {
        out1.seekp(headerPos);
    }
    writeIndex<index_t>(out1, gbwtLen, this->toBe());
    writeIndex<index_t>(out1, gh._numNodes, this->toBe());
    out1.seekp(out1pos);
    
    index_t  ftabLen = gh._ftabLen;
    index_t  sideSz = gh._sideSz;
    index_t  gbwtTotSz = gh._gbwtTotSz;
    index_t  fchr[] = {0, 0, 0, 0, 0};
    EList<index_t> ftab(EBWT_CAT);
    EList<index_t> zOffs;
    
    // Save # of occurrences of each character as we walk along the bwt
    index_t occ[4] = {0, 0, 0, 0};
    index_t occSave[4] = {0, 0, 0, 0};
    
    // Record rows that should "absorb" adjacent rows in the ftab.
    // The absorbed rows represent suffixes shorter than the ftabChars
    // cutoff.
    uint8_t absorbCnt = 0;
    EList<uint8_t> absorbFtab(EBWT_CAT);
    try {
        VMSG_NL("Allocating ftab, absorbFtab");
        ftab.resize(ftabLen);
        ftab.fillZero();
        absorbFtab.resize(ftabLen);
        absorbFtab.fillZero();
    } catch(bad_alloc &e) {
        cerr << "Out of memory allocating ftab[] or absorbFtab[] "
        << "in GFM::buildToDisk() at " << __FILE__ << ":"
        << __LINE__ << endl;
        throw e;
    }
    
    // Allocate the side buffer; holds a single side as its being
    // constructed and then written to disk.  Reused across all sides.
#ifdef SIXTY4_FORMAT
    EList<uint64_t> gfmSide(EBWT_CAT);
#else
    EList<uint8_t> gfmSide(EBWT_CAT);
#endif
    try {
#ifdef SIXTY4_FORMAT
        gfmSide.resize(sideSz >> 3);
#else
        gfmSide.resize(sideSz);
#endif
    } catch(bad_alloc &e) {
        cerr << "Out of memory allocating gfmSide[] in "
        << "GFM::buildToDisk() at " << __FILE__ << ":"
        << __LINE__ << endl;
        throw e;
    }
    
    // Points to the base offset within ebwt for the side currently
    // being written
    index_t side = 0;
    
    // Whether we're assembling a forward or a reverse bucket
    bool fw = true;
    int sideCur = 0;
    
    // Have we skipped the '$' in the last column yet?
    ASSERT_ONLY(bool dollarSkipped = false);
    
    index_t si = 0;   // string offset (chars)
    ASSERT_ONLY(index_t lastSufInt = 0);
    ASSERT_ONLY(bool inSA = true); // true iff saI still points inside suffix
    // array (as opposed to the padding at the
    // end)
    // Iterate over packed bwt bytes
    VMSG_NL("Entering GFM loop");
    ASSERT_ONLY(index_t beforeGbwtOff = (index_t)out1.tellp());
    while(side < gbwtTotSz) {
        // Sanity-check our cursor into the side buffer
        assert_geq(sideCur, 0);
        assert_lt(sideCur, (int)gh._sideGbwtSz);
        assert_eq(0, side % sideSz); // 'side' must be on side boundary
        gfmSide[sideCur] = 0; // clear
        assert_lt(side + sideCur, gbwtTotSz);
        // Iterate over bit-pairs in the si'th character of the BWT
#ifdef SIXTY4_FORMAT
        for(int bpi = 0; bpi < 32; bpi++, si++)
#else
        for(int bpi = 0; bpi < 4; bpi++, si++)
#endif
        {
            int bwtChar;
            bool count = true;
            if(si <= len) {
                // Still in the SA; extract the bwtChar
                index_t saElt = sa.nextSuffix();
                // (that might have triggered sa to calc next suf block)
                if(saElt == 0) {
                    // Don't add the '$' in the last column to the BWT
                    // transform; we can't encode a $ (only A C T or G)
                    // and counting it as, say, an A, will mess up the
                    // LR mapping
                    bwtChar = 0; count = false;
                    ASSERT_ONLY(dollarSkipped = true);
                    zOffs.push_back(si); // remember the SA row that
                    // corresponds to the 0th suffix
                } else {
                    bwtChar = (int)(s[saElt-1]);
                    assert_lt(bwtChar, 4);
                    // Update the fchr
                    fchr[bwtChar]++;
                }
                // Update ftab
                if((len-saElt) >= (index_t)gh._ftabChars) {
                    // Turn the first ftabChars characters of the
                    // suffix into an integer index into ftab.  The
                    // leftmost (lowest index) character of the suffix
                    // goes in the most significant bit pair if the
                    // integer.
                    index_t sufInt = 0;
                    for(int i = 0; i < gh._ftabChars; i++) {
                        sufInt <<= 2;
                        assert_lt((index_t)i, len-saElt);
                        sufInt |= (unsigned char)(s[saElt+i]);
                    }
                    // Assert that this prefix-of-suffix is greater
                    // than or equal to the last one (true b/c the
                    // suffix array is sorted)
#ifndef NDEBUG
                    if(lastSufInt > 0) assert_geq(sufInt, lastSufInt);
                    lastSufInt = sufInt;
#endif
                    // Update ftab
                    assert_lt(sufInt+1, ftabLen);
                    ftab[sufInt+1]++;
                    if(absorbCnt > 0) {
                        // Absorb all short suffixes since the last
                        // transition into this transition
                        absorbFtab[sufInt] = absorbCnt;
                        absorbCnt = 0;
                    }
                } else {
                    // Otherwise if suffix is fewer than ftabChars
                    // characters long, then add it to the 'absorbCnt';
                    // it will be absorbed into the next transition
                    assert_lt(absorbCnt, 255);
                    absorbCnt++;
                }
                // Suffix array offset boundary? - update offset array
                if((si & gh._offMask) == si) {
                    assert_lt((si >> gh._offRate), gh._offsLen);
                    // Write offsets directly to the secondary output
                    // stream, thereby avoiding keeping them in memory
                    writeIndex<index_t>(out2, saElt, this->toBe());
                }
            } else {
                // Strayed off the end of the SA, now we're just
                // padding out a bucket
#ifndef NDEBUG
                if(inSA) {
                    // Assert that we wrote all the characters in the
                    // string before now
                    assert_eq(si, len+1);
                    inSA = false;
                }
#endif
                // 'A' used for padding; important that padding be
                // counted in the occ[] array
                bwtChar = 0;
            }
            if(count) occ[bwtChar]++;
            // Append BWT char to bwt section of current side
            if(fw) {
                // Forward bucket: fill from least to most
#ifdef SIXTY4_FORMAT
                ebwtSide[sideCur] |= ((uint64_t)bwtChar << (bpi << 1));
                if(bwtChar > 0) assert_gt(ebwtSide[sideCur], 0);
#else
                pack_2b_in_8b(bwtChar, gfmSide[sideCur], bpi);
                assert_eq((gfmSide[sideCur] >> (bpi*2)) & 3, bwtChar);
#endif
            } else {
                // Backward bucket: fill from most to least
#ifdef SIXTY4_FORMAT
                ebwtSide[sideCur] |= ((uint64_t)bwtChar << ((31 - bpi) << 1));
                if(bwtChar > 0) assert_gt(ebwtSide[sideCur], 0);
#else
                pack_2b_in_8b(bwtChar, gfmSide[sideCur], 3-bpi);
                assert_eq((gfmSide[sideCur] >> ((3-bpi)*2)) & 3, bwtChar);
#endif
            }
        } // end loop over bit-pairs
        assert_eq(dollarSkipped ? 3 : 0, (occ[0] + occ[1] + occ[2] + occ[3]) & 3);
#ifdef SIXTY4_FORMAT
        assert_eq(0, si & 31);
#else
        assert_eq(0, si & 3);
#endif
        
        sideCur++;
        if(sideCur == (int)gh._sideGbwtSz) {
            sideCur = 0;
            index_t *uside = reinterpret_cast<index_t*>(gfmSide.ptr());
            // Write 'A', 'C', 'G', 'T', and '1' in M tallies
            side += sideSz;
            assert_leq(side, gh._gbwtTotSz);
            uside[(sideSz / sizeof(index_t))-4] = endianizeIndex(occSave[0], this->toBe());
            uside[(sideSz / sizeof(index_t))-3] = endianizeIndex(occSave[1], this->toBe());
            uside[(sideSz / sizeof(index_t))-2] = endianizeIndex(occSave[2], this->toBe());
            uside[(sideSz / sizeof(index_t))-1] = endianizeIndex(occSave[3], this->toBe());
            occSave[0] = occ[0];
            occSave[1] = occ[1];
            occSave[2] = occ[2];
            occSave[3] = occ[3];
            // Write backward side to primary file
            out1.write((const char *)gfmSide.ptr(), sideSz);
        }
    }
    VMSG_NL("Exited GFM loop");
    if(absorbCnt > 0) {
        // Absorb any trailing, as-yet-unabsorbed short suffixes into
        // the last element of ftab
        absorbFtab[ftabLen-1] = absorbCnt;
    }
    
    // Assert that our loop counter got incremented right to the end
    assert_eq(side, gh._gbwtTotSz);
    // Assert that we wrote the expected amount to out1
    assert_eq(((index_t)out1.tellp() - beforeGbwtOff), gh._gbwtTotSz);
    // assert that the last thing we did was write a forward bucket
    
    //
    // Write zOffs to primary stream
    //
    assert_eq(zOffs.size(), 1);
    writeIndex<index_t>(out1, (index_t)zOffs.size(), this->toBe());
    for(size_t i = 0; i < zOffs.size(); i++) {
        assert_neq(zOffs[i], (index_t)OFF_MASK);
        writeIndex<index_t>(out1, zOffs[i], this->toBe());
    }
    
    //
    // Finish building fchr
    //
    // Exclusive prefix sum on fchr
    for(int i = 1; i < 4; i++) {
        fchr[i] += fchr[i-1];
    }
    assert_lt(fchr[3], gbwtLen);
    // Shift everybody up by one
    for(int i = 4; i >= 1; i--) {
        fchr[i] = fchr[i-1];
    }
    fchr[0] = 0;
    if(_verbose) {
        for(int i = 0; i < 5; i++)
            cerr << "fchr[" << "ACGT$"[i] << "]: " << fchr[i] << endl;
    }
    // Write fchr to primary file
    for(int i = 0; i < 5; i++) {
        writeIndex<index_t>(out1, fchr[i], this->toBe());
    }
    
    //
    // Finish building ftab and build eftab
    //
    // Prefix sum on ftable
    index_t eftabLen = 0;
    assert_eq(0, absorbFtab[0]);
    for(index_t i = 1; i < ftabLen; i++) {
        if(absorbFtab[i] > 0) eftabLen += 2;
    }
    assert_leq(eftabLen, (index_t)gh._ftabChars*2);
    eftabLen = gh._ftabChars*2;
    EList<index_t> eftab(EBWT_CAT);
    try {
        eftab.resize(eftabLen);
        eftab.fillZero();
    } catch(bad_alloc &e) {
        cerr << "Out of memory allocating eftab[] "
        << "in GFM::buildToDisk() at " << __FILE__ << ":"
        << __LINE__ << endl;
        throw e;
    }
    index_t eftabCur = 0;
    for(index_t i = 1; i < ftabLen; i++) {
        index_t lo = ftab[i] + GFM<index_t>::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i-1);
        if(absorbFtab[i] > 0) {
            // Skip a number of short pattern indicated by absorbFtab[i]
            index_t hi = lo + absorbFtab[i];
            assert_lt(eftabCur*2+1, eftabLen);
            eftab[eftabCur*2] = lo;
            eftab[eftabCur*2+1] = hi;
            ftab[i] = (eftabCur++) ^ (index_t)OFF_MASK; // insert pointer into eftab
            assert_eq(lo, GFM<index_t>::ftabLo(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i));
            assert_eq(hi, GFM<index_t>::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i));
        } else {
            ftab[i] = lo;
        }
    }
    assert_eq(GFM<index_t>::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, ftabLen-1), len+1);
    // Write ftab to primary file
    for(index_t i = 0; i < ftabLen; i++) {
        writeIndex<index_t>(out1, ftab[i], this->toBe());
    }
    // Write eftab to primary file
    out1pos = out1.tellp();
    if(headerPos < 0) {
        out1.seekp(24 + sizeof(index_t) * 3);
    } else {
        out1.seekp((int)headerPos + 16 + sizeof(index_t) * 2);
    }
    writeIndex<index_t>(out1, eftabLen, this->toBe());
    out1.seekp(out1pos);
    for(index_t i = 0; i < eftabLen; i++) {
        writeIndex<index_t>(out1, eftab[i], this->toBe());
    }
    
    // Note: if you'd like to sanity-check the Ebwt, you'll have to
    // read it back into memory first!
    assert(!isInMemory());
    VMSG_NL("Exiting GFM::buildToDisk()");
}

extern string gLastIOErrMsg;

/* Checks whether a call to read() failed or not. */
inline bool is_read_err(int fdesc, ssize_t ret, size_t count) {
    if (ret < 0) {
        std::stringstream sstm;
        sstm << "ERRNO: " << errno << " ERR Msg:" << strerror(errno) << std::endl;
		gLastIOErrMsg = sstm.str();
        return true;
    }
    return false;
}

/* Checks whether a call to fread() failed or not. */
inline bool is_fread_err(FILE* file_hd, size_t ret, size_t count) {
    if (ferror(file_hd)) {
        gLastIOErrMsg = "Error Reading File!";
        return true;
    }
    return false;
}


///////////////////////////////////////////////////////////////////////
//
// Functions for searching Ebwts
// (But most of them are defined in the header)
//
///////////////////////////////////////////////////////////////////////

/**
 * Take an offset into the joined text and translate it into the
 * reference of the index it falls on, the offset into the reference,
 * and the length of the reference.  Use a binary search through the
 * sorted list of reference fragment ranges t
 */
template <typename index_t>
bool GFM<index_t>::joinedToTextOff(
                                   index_t qlen,
                                   index_t off,
                                   index_t& tidx,
                                   index_t& textoff,
                                   index_t& tlen,
                                   bool rejectStraddle,
                                   bool& straddled) const
{
	assert(rstarts() != NULL); // must have loaded rstarts
	index_t top = 0;
	index_t bot = _nFrag; // 1 greater than largest addressable element
	index_t elt = (index_t)INDEX_MAX;
	// Begin binary search
	while(true) {
		index_t oldelt = elt;
		elt = top + ((bot - top) >> 1);
        if(oldelt == elt) {
            tidx = (index_t)INDEX_MAX;
            return false;
        }
		index_t lower = rstarts()[elt*3];
		index_t upper;
		if(elt == _nFrag-1) {
			upper = _gh._len;
		} else {
			upper = rstarts()[((elt+1)*3)];
		}
		assert_gt(upper, lower);
		index_t fraglen = upper - lower;
		if(lower <= off) {
			if(upper > off) { // not last element, but it's within
				// off is in this range; check if it falls off
				if(off + qlen > upper) {
					straddled = true;
					if(rejectStraddle) {
						// it falls off; signal no-go and return
						tidx = (index_t)INDEX_MAX;
						return false;
					}
				}
				// This is the correct text idx whether the index is
				// forward or reverse
				tidx = rstarts()[(elt*3)+1];
				assert_lt(tidx, this->_nPat);
				assert_leq(fraglen, this->plen()[tidx]);
				// it doesn't fall off; now calculate textoff.
				// Initially it's the number of characters that precede
				// the alignment in the fragment
				index_t fragoff = off - rstarts()[(elt*3)];
				if(!this->fw_) {
					fragoff = fraglen - fragoff - 1;
					fragoff -= (qlen-1);
				}
				// Add the alignment's offset into the fragment
				// ('fragoff') to the fragment's offset within the text
				textoff = fragoff + rstarts()[(elt*3)+2];
				assert_lt(textoff, this->plen()[tidx]);
				break; // done with binary search
			} else {
				// 'off' belongs somewhere in the region between elt
				// and bot
				top = elt;
			}
		} else {
			// 'off' belongs somewhere in the region between top and
			// elt
			bot = elt;
		}
		// continue with binary search
	}
	tlen = this->plen()[tidx];
    return true;
}

template <typename index_t>
bool GFM<index_t>::textOffToJoined(
                                   index_t  tid,
                                   index_t  textoff,
                                   index_t& off) const
{
    assert(rstarts() != NULL); // must have loaded rstarts
    index_t top = 0;
    index_t bot = _nFrag; // 1 greater than largest addressable element
    index_t elt = (index_t)INDEX_MAX;
    // Begin binary search
    while(true) {
        ASSERT_ONLY(index_t oldelt = elt);
        elt = top + ((bot - top) >> 1);
        assert_neq(oldelt, elt); // must have made progress
        index_t elt_tid = rstarts()[elt*3 + 1];
        if(elt_tid == tid) {
            while(true) {
                if(tid != rstarts()[elt*3+1]) {
                    return false;
                }
                if(rstarts()[elt*3 + 2] <= textoff) break;
                if(elt == 0) return false;
                elt--;
            }
            while(true) {
                assert_leq(rstarts()[elt*3+2], textoff);
                if(elt + 1 == _nFrag ||
                   tid + 1 == rstarts()[(elt+1)*3 + 1] ||
                   textoff < rstarts()[(elt+1)*3 + 2]) {
                    off = rstarts()[elt*3] + (textoff - rstarts()[elt*3 + 2]);
                    if(elt + 1 < _nFrag &&
                       tid == rstarts()[(elt+1)*3 + 1] &&
                       off >= rstarts()[(elt+1)*3]) {
                        return false;
                    }
                    break;
                }
                elt++;
            }
            break; // done with binary search
        } else if(elt_tid < tid) {
            top = elt;
        } else {
            bot = elt;
        }
        // continue with binary search
    }
    return true;
}

/**
 * Walk 'steps' steps to the left and return the row arrived at.  If we
 * walk through the dollar sign, return 0xffffffff.
 */
template <typename index_t>
index_t GFM<index_t>::walkLeft(index_t row, index_t steps) const {
	assert(offs() != NULL);
	assert_neq((index_t)INDEX_MAX, row);
	SideLocus<index_t> l;
	if(steps > 0) l.initFromRow(row, _gh, gfm());
	while(steps > 0) {
        for(index_t i = 0; i < _zOffs.size(); i++) {
            if(row == _zOffs[i]) return (index_t)INDEX_MAX;
        }
        pair<index_t, index_t> range = this->mapGLF1(row, l, (pair<index_t, index_t> *)NULL ASSERT_ONLY(, false));
        index_t newrow = range.first;
		assert_neq((index_t)INDEX_MAX, newrow);
		assert_neq(newrow, row);
		row = newrow;
		steps--;
		if(steps > 0) l.initFromRow(row, _gh, gfm());
	}
	return row;
}

/**
 * Resolve the reference offset of the BW element 'elt'.
 */
template <typename index_t>
index_t GFM<index_t>::getOffset(index_t row, index_t node) const {
	assert(offs() != NULL);
	assert_neq((index_t)INDEX_MAX, row);
    for(index_t i = 0; i < _zOffs.size(); i++) {
        if(row == _zOffs[i]) return 0;
    }
    if((node & _gh._offMask) == node) {
        index_t off = this->offs()[node >> _gh._offRate];
        if(off != (index_t)INDEX_MAX)
            return off;
    }
	index_t jumps = 0;
	SideLocus<index_t> l;
	l.initFromRow(row, _gh, gfm());
	while(true) {
        pair<index_t, index_t> node_range(0, 0);
        pair<index_t, index_t> range = this->mapGLF1(row, l, &node_range ASSERT_ONLY(, false));
        index_t newrow = range.first;
		jumps++;
		assert_neq((index_t)INDEX_MAX, newrow);
		assert_neq(newrow, row);
		row = newrow;
        for(index_t i = 0; i < _zOffs.size(); i++) {
            if(row == _zOffs[i]) return jumps;
        }
        
        if((node_range.first & _gh._offMask) == node_range.first) {
            index_t off = this->offs()[node_range.first >> _gh._offRate];
            if(off != (index_t)INDEX_MAX)
                return jumps + off;
		}
		l.initFromRow(row, _gh, gfm());
	}
}

/**
 * Resolve the reference offset of the BW element 'elt' such that
 * the offset returned is at the right-hand side of the forward
 * reference substring involved in the hit.
 */
template <typename index_t>
index_t GFM<index_t>::getOffset(
                                index_t elt,
                                bool fw,
                                index_t hitlen) const
{
	index_t off = getOffset(elt);
	assert_neq((index_t)INDEX_MAX, off);
	if(!fw) {
		assert_lt(off, _gh._len);
		off = _gh._len - off - 1;
        assert_geq(off, hitlen-1);
		off -= (hitlen-1);
        assert_lt(off, _gh._len);
	}
	return off;
}

/**
 * Returns true iff the index contains the given string (exactly).  The given
 * string must contain only unambiguous characters.  TODO: support ambiguous
 * characters in 'str'.
 */
template <typename index_t>
bool GFM<index_t>::contains(
                            const BTDnaString& str,
                            index_t *otop,
                            index_t *obot) const
{
	assert(isInMemory());
	SideLocus<index_t> tloc, bloc;
	if(str.empty()) {
		if(otop != NULL && obot != NULL) *otop = *obot = 0;
		return true;
	}
	int c = str[str.length()-1];
	assert_range(0, 4, c);
	index_t top = 0, bot = 0;
	if(c < 4) {
		top = fchr()[c];
		bot = fchr()[c+1];
	} else {
		bool set = false;
		for(int i = 0; i < 4; i++) {
			if(fchr()[c] < fchr()[c+1]) {
				if(set) {
					return false;
				} else {
					set = true;
					top = fchr()[c];
					bot = fchr()[c+1];
				}
			}
		}
	}
	assert_geq(bot, top);
	tloc.initFromRow(top, gh(), gfm());
	bloc.initFromRow(bot, gh(), gfm());
	ASSERT_ONLY(index_t lastDiff = bot - top);
	for(int64_t i = (int64_t)str.length()-2; i >= 0; i--) {
		c = str[i];
		assert_range(0, 4, c);
		if(c <= 3) {
			top = mapLF(tloc, c);
			bot = mapLF(bloc, c);
		} else {
			index_t sz = bot - top;
			int c1 = mapLF1(top, tloc ASSERT_ONLY(, false));
			bot = mapLF(bloc, c1);
			assert_leq(bot - top, sz);
			if(bot - top < sz) {
				// Encountered an N and could not proceed through it because
				// there was more than one possible nucleotide we could replace
				// it with
				return false;
			}
		}
		assert_geq(bot, top);
		assert_leq(bot-top, lastDiff);
		ASSERT_ONLY(lastDiff = bot-top);
		if(i > 0) {
			tloc.initFromRow(top, gh(), gfm());
			bloc.initFromRow(bot, gh(), gfm());
		}
	}
	if(otop != NULL && obot != NULL) {
		*otop = top; *obot = bot;
	}
	return bot > top;
}

///////////////////////////////////////////////////////////////////////
//
// Functions for reading and writing Ebwts
//
///////////////////////////////////////////////////////////////////////

/**
 * Read an Ebwt from file with given filename.
 */
template <typename index_t>
void GFM<index_t>::readIntoMemory(
                                  int needEntireRev,
                                  bool loadSASamp,
                                  bool loadFtab,
                                  bool loadRstarts,
                                  bool justHeader,
                                  GFMParams<index_t> *params,
                                  bool mmSweep,
                                  bool loadNames,
                                  bool startVerbose,
                                  bool subIndex)
{
    bool switchEndian; // dummy; caller doesn't care
#ifdef BOWTIE_MM
    char *mmFile[] = { NULL, NULL };
#endif
    if(_in1Str.length() > 0 && !subIndex) {
        if(_verbose || startVerbose) {
            cerr << "  About to open input files: ";
            logTime(cerr);
        }
        // Initialize our primary and secondary input-stream fields
        if(_in1 != NULL) fclose(_in1);
        if(_verbose || startVerbose) cerr << "Opening \"" << _in1Str.c_str() << "\"" << endl;
        if((_in1 = fopen(_in1Str.c_str(), "rb")) == NULL) {
            cerr << "Could not open index file " << _in1Str.c_str() << endl;
        }
        if(loadSASamp) {
            if(_in2 != NULL) fclose(_in2);
            if(_verbose || startVerbose) cerr << "Opening \"" << _in2Str.c_str() << "\"" << endl;
            if((_in2 = fopen(_in2Str.c_str(), "rb")) == NULL) {
                cerr << "Could not open index file " << _in2Str.c_str() << endl;
            }
        }
        if(_verbose || startVerbose) {
            cerr << "  Finished opening input files: ";
            logTime(cerr);
        }
        
#ifdef BOWTIE_MM
        if(_useMm /*&& !justHeader*/) {
            const char *names[] = {_in1Str.c_str(), _in2Str.c_str()};
            int fds[] = { fileno(_in1), fileno(_in2) };
            for(int i = 0; i < (loadSASamp ? 2 : 1); i++) {
                if(_verbose || startVerbose) {
                    cerr << "  Memory-mapping input file " << (i+1) << ": ";
                    logTime(cerr);
                }
                struct stat sbuf;
                if (stat(names[i], &sbuf) == -1) {
                    perror("stat");
                    cerr << "Error: Could not stat index file " << names[i] << " prior to memory-mapping" << endl;
                    throw 1;
                }
                mmFile[i] = (char*)mmap((void *)0, (size_t)sbuf.st_size,
                                        PROT_READ, MAP_SHARED, fds[(size_t)i], 0);
                if(mmFile[i] == (void *)(-1)) {
                    perror("mmap");
                    cerr << "Error: Could not memory-map the index file " << names[i] << endl;
                    throw 1;
                }
                if(mmSweep) {
                    int sum = 0;
                    for(off_t j = 0; j < sbuf.st_size; j += 1024) {
                        sum += (int) mmFile[i][j];
                    }
                    if(startVerbose) {
                        cerr << "  Swept the memory-mapped ebwt index file 1; checksum: " << sum << ": ";
                        logTime(cerr);
                    }
                }
            }
            mmFile1_ = mmFile[0];
            mmFile2_ = loadSASamp ? mmFile[1] : NULL;
        }
#endif
    }
#ifdef BOWTIE_MM
    else if(_useMm && !justHeader) {
        mmFile[0] = mmFile1_;
        mmFile[1] = mmFile2_;
    }
    if(_useMm && !justHeader) {
        assert(mmFile[0] == mmFile1_);
        assert(mmFile[1] == mmFile2_);
    }
#endif
    
    if(_verbose || startVerbose) {
        cerr << "  Reading header: ";
        logTime(cerr);
    }
    
    // Read endianness hints from both streams
    size_t bytesRead = 0;
    if(!subIndex) {
        switchEndian = false;
        uint32_t one = readU32(_in1, switchEndian); // 1st word of primary stream
        bytesRead += 4;
        if(loadSASamp) {
#ifndef NDEBUG
            assert_eq(one, readU32(_in2, switchEndian)); // should match!
#else
            readU32(_in2, switchEndian);
#endif
        }
        if(one != 1) {
            assert_eq((1u<<24), one);
            assert_eq(1, endianSwapU32(one));
            switchEndian = true;
        }

        _toBigEndian = switchEndian;

        // Can't switch endianness and use memory-mapped files; in order to
        // support this, someone has to modify the file to switch
        // endiannesses appropriately, and we can't do this inside Bowtie
        // or we might be setting up a race condition with other processes.
        if(switchEndian && _useMm) {
            cerr << "Error: Can't use memory-mapped files when the index is the opposite endianness" << endl;
            throw 1;
        }

        // Reads header entries one by one from primary stream
        int index_version = (int)readU32(_in1, switchEndian); bytesRead += 4;
        int major_index_version, minor_index_version;
        string index_version_extra;
        readIndexVersion(index_version, major_index_version, minor_index_version, index_version_extra);
        int major_program_version, minor_program_version;
        string program_version_extra;
        readProgramVersion(major_program_version, minor_program_version, program_version_extra);
        if(major_program_version < major_index_version ||
           (major_program_version == major_index_version && minor_program_version < minor_index_version)) {
            cerr << "Warning: the current version of HISAT2 (" << HISAT2_VERSION << ") is older than the version (2."
            << major_index_version << "." << minor_index_version;
            if(index_version_extra.length() > 0) {
                cerr << "-" << index_version_extra;
            }
            cerr << ") used to build the index." << endl;
            cerr << "         Users are strongly recommended to update HISAT2 to the latest version." << endl;
        }
    } else {
        switchEndian = _toBigEndian;
    }

    index_t len       = readIndex<index_t>(_in1, switchEndian);
    bytesRead += sizeof(index_t);
    index_t gbwtLen       = readIndex<index_t>(_in1, switchEndian);
    bytesRead += sizeof(index_t);
    assert_lt(len, gbwtLen);
    index_t numNodes      = readIndex<index_t>(_in1, switchEndian);
    bytesRead += sizeof(index_t);
    int32_t  lineRate     = readI32(_in1, switchEndian);
    bytesRead += 4;
    /*int32_t  linesPerSide =*/ readI32(_in1, switchEndian);
    bytesRead += 4;
    int32_t  offRate      = readI32(_in1, switchEndian);
    bytesRead += 4;
    // TODO: add isaRate to the actual file format (right now, the
    // user has to tell us whether there's an ISA sample and what the
    // sampling rate is.
    int32_t  ftabChars    = readI32(_in1, switchEndian);
    bytesRead += 4;
    index_t  eftabLen     = readIndex<index_t>(_in1, switchEndian);
    bytesRead += sizeof(index_t);
    // chunkRate was deprecated in an earlier version of Bowtie; now
    // we use it to hold flags.
    int32_t flags = readI32(_in1, switchEndian);
    bool entireRev = false;
    if(flags < 0 && (((-flags) & GFM_ENTIRE_REV) == 0)) {
        if(needEntireRev != -1 && needEntireRev != 0) {
            cerr << "Error: This index is compatible with 0.* versions of Bowtie, but not with 2.*" << endl
            << "versions.  Please build or download a version of the index that is compitble" << endl
            << "with Bowtie 2.* (i.e. built with bowtie-build 2.* or later)" << endl;
            throw 1;
        }
    } else entireRev = true;
    bytesRead += 4;
    
    // Create a new EbwtParams from the entries read from primary stream
    GFMParams<index_t> *gh;
    bool deleteGh = false;
    if(params != NULL) {
        params->init(len, gbwtLen, numNodes, lineRate, offRate, ftabChars, eftabLen, entireRev);
        if(_verbose || startVerbose) params->print(cerr);
        gh = params;
    } else {
        gh = new GFMParams<index_t>(len, gbwtLen, numNodes, lineRate, offRate, ftabChars, eftabLen, entireRev);
        deleteGh = true;
    }
    
    // Set up overridden suffix-array-sample parameters
    index_t offsLen = gh->_offsLen;
    index_t offRateDiff = 0;
    index_t offsLenSampled = offsLen;
    if(_overrideOffRate > offRate) {
        offRateDiff = _overrideOffRate - offRate;
    }
    if(offRateDiff > 0) {
        offsLenSampled >>= offRateDiff;
        if((offsLen & ~(((index_t)INDEX_MAX) << offRateDiff)) != 0) {
            offsLenSampled++;
        }
    }
    
    // Can't override the offrate or isarate and use memory-mapped
    // files; ultimately, all processes need to copy the sparser sample
    // into their own memory spaces.
#if 0
    if(_useMm && (offRateDiff)) {
        cerr << "Error: Can't use memory-mapped files when the offrate is overridden" << endl;
        throw 1;
    }
#endif
    
    // Read nPat from primary stream
    this->_nPat = readIndex<index_t>(_in1, switchEndian);
    bytesRead += sizeof(index_t);
    _plen.reset();
    // Read plen from primary stream
    if(_useMm) {
#ifdef BOWTIE_MM
        _plen.init((index_t*)(mmFile[0] + bytesRead), _nPat, false);
        bytesRead += _nPat*sizeof(index_t);
        fseek(_in1, _nPat*sizeof(index_t), SEEK_CUR);
#endif
    } else {
        try {
            if(_verbose || startVerbose) {
                cerr << "Reading plen (" << this->_nPat << "): ";
                logTime(cerr);
            }
            _plen.init(new index_t[_nPat], _nPat, true);
            if(switchEndian) {
                for(index_t i = 0; i < this->_nPat; i++) {
                    plen()[i] = readIndex<index_t>(_in1, switchEndian);
                }
            } else {
                size_t r = MM_READ(_in1, (void*)(plen()), _nPat*sizeof(index_t));
                if(r != (size_t)(_nPat*sizeof(index_t))) {
                    cerr << "Error reading _plen[] array: " << r << ", " << _nPat*sizeof(index_t) << endl;
                    throw 1;
                }
            }
        } catch(bad_alloc& e) {
            cerr << "Out of memory allocating plen[] in Ebwt::read()"
            << " at " << __FILE__ << ":" << __LINE__ << endl;
            throw e;
        }
    }
    
    // TODO: I'm not consistent on what "header" means.  Here I'm using
    // "header" to mean everything that would exist in memory if we
    // started to build the Ebwt but stopped short of the build*() step
    // (i.e. everything up to and including join()).
    if(justHeader) {
        // Be kind
        if(deleteGh) delete gh;
#ifdef BOWTIE_MM
        fseek(_in1, 0, SEEK_SET);
        if(loadSASamp) fseek(_in2, 0, SEEK_SET);
#else
        rewind(_in1);
        if(loadSASamp) rewind(_in2);
#endif
        return;
    }
    
    bool shmemLeader;
    
    this->_nFrag = readIndex<index_t>(_in1, switchEndian);
    bytesRead += sizeof(index_t);
    if(_verbose || startVerbose) {
        cerr << "Reading rstarts (" << this->_nFrag*3 << "): ";
        logTime(cerr);
    }
    assert_geq(this->_nFrag, this->_nPat);
    _rstarts.reset();
    if(loadRstarts) {
        if(_useMm) {
#ifdef BOWTIE_MM
            _rstarts.init((index_t*)(mmFile[0] + bytesRead), _nFrag*3, false);
            bytesRead += this->_nFrag*sizeof(index_t)*3;
            fseek(_in1, this->_nFrag*sizeof(index_t)*3, SEEK_CUR);
#endif
        } else {
            _rstarts.init(new index_t[_nFrag*3], _nFrag*3, true);
            if(switchEndian) {
                for(size_t i = 0; i < (size_t)(this->_nFrag*3); i += 3) {
                    // fragment starting position in joined reference
                    // string, text id, and fragment offset within text
                    this->rstarts()[i]   = readIndex<index_t>(_in1, switchEndian);
                    this->rstarts()[i+1] = readIndex<index_t>(_in1, switchEndian);
                    this->rstarts()[i+2] = readIndex<index_t>(_in1, switchEndian);
                }
            } else {
                size_t r = MM_READ(_in1, (void *)rstarts(), this->_nFrag*sizeof(index_t)*3);
                if(r != (size_t)(this->_nFrag*sizeof(index_t)*3)) {
                    cerr << "Error reading _rstarts[] array: " << r << ", " << (this->_nFrag*sizeof(index_t)*3) << endl;
                    throw 1;
                }
            }
        }
    } else {
        // Skip em
        assert(rstarts() == NULL);
        bytesRead += this->_nFrag*sizeof(index_t)*3;
        fseek(_in1, this->_nFrag*sizeof(index_t)*3, SEEK_CUR);
    }
    
    _gfm.reset();
    if(_useMm) {
#ifdef BOWTIE_MM
        _gfm.init((uint8_t*)(mmFile[0] + bytesRead), gh->_gbwtTotLen, false);
        bytesRead += gh->_gbwtTotLen;
        fseek(_in1, gh->_gbwtTotLen, SEEK_CUR);
#endif
    } else {
        // Allocate ebwt (big allocation)
        if(_verbose || startVerbose) {
            cerr << "Reading ebwt (" << gh->_gbwtTotLen << "): ";
            logTime(cerr);
        }
        bool shmemLeader = true;
        if(useShmem_) {
            uint8_t *tmp = NULL;
            shmemLeader = ALLOC_SHARED_U8(
                                          (_in1Str + "[ebwt]"), gh->_gbwtTotLen, &tmp,
                                          "gfm[]", (_verbose || startVerbose));
            assert(tmp != NULL);
            _gfm.init(tmp, gh->_gbwtTotLen, false);
            if(_verbose || startVerbose) {
                cerr << "  shared-mem " << (shmemLeader ? "leader" : "follower") << endl;
            }
        } else {
            try {
                _gfm.init(new uint8_t[gh->_gbwtTotLen], gh->_gbwtTotLen, true);
            } catch(bad_alloc& e) {
                cerr << "Out of memory allocating the gfm[] array for the Bowtie index.  Please try" << endl
                << "again on a computer with more memory." << endl;
                throw 1;
            }
        }
        if(shmemLeader) {
            // Read ebwt from primary stream
            uint64_t bytesLeft = gh->_gbwtTotLen;
            char *pgbwt = (char*)this->gfm();
            while (bytesLeft>0){
                size_t r = MM_READ(this->_in1, (void *)pgbwt, bytesLeft);
                if(MM_IS_IO_ERR(this->_in1, r, bytesLeft)) {
                    cerr << "Error reading _ebwt[] array: " << r << ", "
                    << bytesLeft << endl;
                    throw 1;
                }
                pgbwt += r;
                bytesLeft -= r;
            }
            if(switchEndian) {
                uint8_t *side = this->gfm();
                for(size_t i = 0; i < gh->_numSides; i++) {
                    index_t *cums = reinterpret_cast<index_t*>(side + gh->_sideSz - sizeof(index_t)*2);
                    cums[0] = endianSwapIndex(cums[0]);
                    cums[1] = endianSwapIndex(cums[1]);
                    side += this->_gh._sideSz;
                }
            }
#ifdef BOWTIE_SHARED_MEM
            if(useShmem_) NOTIFY_SHARED(gfm(), gh->_gbwtTotLen);
#endif
        } else {
            // Seek past the data and wait until master is finished
            fseek(_in1, gh->_gbwtTotLen, SEEK_CUR);
#ifdef BOWTIE_SHARED_MEM
            if(useShmem_) WAIT_SHARED(gfm(), gh->_gbwtTotLen);
#endif
        }
    }
    
    // Read zOff from primary stream
    _zOffs.clear();
    index_t num_zOffs = readIndex<index_t>(_in1, switchEndian);
    bytesRead += sizeof(index_t);
    for(index_t i = 0; i < num_zOffs; i++) {
        index_t zOff = readIndex<index_t>(_in1, switchEndian);
        bytesRead += sizeof(index_t);
        assert_lt(zOff, gbwtLen);
        _zOffs.push_back(zOff);
    }
    
    try {
        // Read fchr from primary stream
        if(_verbose || startVerbose) cerr << "Reading fchr (5)" << endl;
        _fchr.reset();
        if(_useMm) {
#ifdef BOWTIE_MM
            _fchr.init((index_t*)(mmFile[0] + bytesRead), 5, false);
            bytesRead += 5*sizeof(index_t);
            fseek(_in1, 5*sizeof(index_t), SEEK_CUR);
#endif
        } else {
            _fchr.init(new index_t[5], 5, true);
            for(int i = 0; i < 5; i++) {
                this->fchr()[i] = readIndex<index_t>(_in1, switchEndian);
                assert_leq(this->fchr()[i], gbwtLen);
                assert(i <= 0 || this->fchr()[i] >= this->fchr()[i-1]);
            }
        }
        assert_gt(this->fchr()[4], this->fchr()[0]);
        // Read ftab from primary stream
        if(_verbose || startVerbose) {
            if(loadFtab) {
                cerr << "Reading ftab (" << gh->_ftabLen << "): ";
                logTime(cerr);
            } else {
                cerr << "Skipping ftab (" << gh->_ftabLen << "): ";
            }
        }
        _ftab.reset();
        if(loadFtab) {
            if(_useMm) {
#ifdef BOWTIE_MM
                _ftab.init((index_t*)(mmFile[0] + bytesRead), gh->_ftabLen, false);
                bytesRead += gh->_ftabLen*sizeof(index_t);
                fseek(_in1, gh->_ftabLen*sizeof(index_t), SEEK_CUR);
#endif
            } else {
                _ftab.init(new index_t[gh->_ftabLen], gh->_ftabLen, true);
                if(switchEndian) {
                    for(size_t i = 0; i < gh->_ftabLen; i++)
                        this->ftab()[i] = readIndex<index_t>(_in1, switchEndian);
                } else {
                    size_t r = MM_READ(_in1, (void *)ftab(), gh->_ftabLen*sizeof(index_t));
                    if(r != (size_t)(gh->_ftabLen*sizeof(index_t))) {
                        cerr << "Error reading _ftab[] array: " << r << ", " << (gh->_ftabLen*sizeof(index_t)) << endl;
                        throw 1;
                    }
                }
            }
            // Read etab from primary stream
            if(_verbose || startVerbose) {
                if(loadFtab) {
                    cerr << "Reading eftab (" << gh->_eftabLen << "): ";
                    logTime(cerr);
                } else {
                    cerr << "Skipping eftab (" << gh->_eftabLen << "): ";
                }
                
            }
            _eftab.reset();
            if(_useMm) {
#ifdef BOWTIE_MM
                _eftab.init((index_t*)(mmFile[0] + bytesRead), gh->_eftabLen, false);
                bytesRead += gh->_eftabLen*sizeof(index_t);
                fseek(_in1, gh->_eftabLen*sizeof(index_t), SEEK_CUR);
#endif
            } else {
                _eftab.init(new index_t[gh->_eftabLen], gh->_eftabLen, true);
                if(switchEndian) {
                    for(size_t i = 0; i < gh->_eftabLen; i++)
                        this->eftab()[i] = readIndex<index_t>(_in1, switchEndian);
                } else {
                    size_t r = MM_READ(_in1, (void *)this->eftab(), gh->_eftabLen*sizeof(index_t));
                    if(r != (size_t)(gh->_eftabLen*sizeof(index_t))) {
                        cerr << "Error reading _eftab[] array: " << r << ", " << (gh->_eftabLen*sizeof(index_t)) << endl;
                        throw 1;
                    }
                }
            }
            for(index_t i = 0; i < gh->_eftabLen; i++) {
                if(i > 0 && this->eftab()[i] > 0) {
                    assert_geq(this->eftab()[i] + 4, this->eftab()[i-1]);
                } else if(i > 0 && this->eftab()[i-1] == 0) {
                    assert_eq(0, this->eftab()[i]);
                }
            }
        } else {
            assert(ftab() == NULL);
            assert(eftab() == NULL);
            // Skip ftab
            bytesRead += gh->_ftabLen*sizeof(index_t);
            fseek(_in1, gh->_ftabLen*sizeof(index_t), SEEK_CUR);
            // Skip eftab
            bytesRead += sizeof(index_t);
            bytesRead += gh->_eftabLen*sizeof(index_t);
            fseek(_in1, gh->_eftabLen*sizeof(index_t), SEEK_CUR);
        }
    } catch(bad_alloc& e) {
        cerr << "Out of memory allocating fchr[], ftab[] or eftab[] arrays for the Bowtie index." << endl
        << "Please try again on a computer with more memory." << endl;
        throw 1;
    }
    
    // Read reference sequence names from primary index file (or not,
    // if --refidx is specified)
    if(loadNames) {
        while(true) {
            char c = '\0';
            if(MM_READ(_in1, (void *)(&c), (size_t)1) != (size_t)1) break;
            bytesRead++;
            if(c == '\0') break;
            else if(c == '\n') {
                this->_refnames.push_back("");
            } else {
                if(this->_refnames.size() == 0) {
                    this->_refnames.push_back("");
                }
                this->_refnames.back().push_back(c);
            }
        }
    }
    
    _offs.reset();
    if(loadSASamp) {
        bytesRead = 4; // reset for secondary index file (already read 1-sentinel)
        
        shmemLeader = true;
        if(_verbose || startVerbose) {
            cerr << "Reading offs (" << offsLenSampled << " " << std::setw(2) << sizeof(index_t)*8 << "-bit words): ";
            logTime(cerr);
        }
        
        if(!_useMm) {
            if(!useShmem_) {
                // Allocate offs_
                try {
                    _offs.init(new index_t[offsLenSampled], offsLenSampled, true);
                } catch(bad_alloc& e) {
                    cerr << "Out of memory allocating the offs[] array  for the Bowtie index." << endl
                    << "Please try again on a computer with more memory." << endl;
                    throw 1;
                }
            } else {
                index_t *tmp = NULL;
                shmemLeader = ALLOC_SHARED_U32(
                                               (_in2Str + "[offs]"), offsLenSampled*sizeof(index_t), &tmp,
                                               "offs", (_verbose || startVerbose));
                _offs.init((index_t*)tmp, offsLenSampled, false);
            }
        }
        
        if(_overrideOffRate < 32) {
            if(shmemLeader) {
                // Allocate offs (big allocation)
                if(switchEndian || offRateDiff > 0) {
                    assert(!_useMm);
                    const index_t blockMaxSz = (index_t)(2 * 1024 * 1024); // 2 MB block size
                    const index_t blockMaxSzU = (blockMaxSz / sizeof(index_t)); // # U32s per block
                    char *buf;
                    try {
                        buf = new char[blockMaxSz];
                    } catch(std::bad_alloc& e) {
                        cerr << "Error: Out of memory allocating part of _offs array: '" << e.what() << "'" << endl;
                        throw e;
                    }
                    for(index_t i = 0; i < offsLen; i += blockMaxSzU) {
                        index_t block = min<index_t>((index_t)blockMaxSzU, (index_t)(offsLen - i));
                        size_t r = MM_READ(_in2, (void *)buf, block * sizeof(index_t));
                        if(r != (size_t)(block * sizeof(index_t))) {
                            cerr << "Error reading block of _offs[] array: " << r << ", " << (block * sizeof(index_t)) << endl;
                            throw 1;
                        }
                        index_t idx = i >> offRateDiff;
                        for(index_t j = 0; j < block; j += (1 << offRateDiff)) {
                            assert_lt(idx, offsLenSampled);
                            this->offs()[idx] = ((index_t*)buf)[j];
                            if(switchEndian) {
                                this->offs()[idx] = endianSwapIndex(this->offs()[idx]);
                            }
                            idx++;
                        }
                    }
                    delete[] buf;
                } else {
                    if(_useMm) {
#ifdef BOWTIE_MM
                        _offs.init((index_t*)(mmFile[1] + bytesRead), offsLen, false);
                        bytesRead += (offsLen * sizeof(index_t));
                        fseek(_in2, (offsLen * sizeof(index_t)), SEEK_CUR);
#endif
                    } else {
                        // Workaround for small-index mode where MM_READ may
                        // not be able to handle read amounts greater than 2^32
                        // bytes.
                        uint64_t bytesLeft = (offsLen * sizeof(index_t));
                        char *offs = (char *)this->offs();
                        
                        while(bytesLeft > 0) {
                            size_t r = MM_READ(_in2, (void*)offs, bytesLeft);
                            if(MM_IS_IO_ERR(_in2,r,bytesLeft)) {
                                cerr << "Error reading block of _offs[] array: "
                                << r << ", " << bytesLeft << gLastIOErrMsg << endl;
                                throw 1;
                            }
                            offs += r;
                            bytesLeft -= r;
                        }
                    }
                }
#ifdef BOWTIE_SHARED_MEM
                if(useShmem_) NOTIFY_SHARED(offs(), offsLenSampled*sizeof(index_t));
#endif
            } else {
                // Not the shmem leader
                fseek(_in2, offsLenSampled*sizeof(index_t), SEEK_CUR);
#ifdef BOWTIE_SHARED_MEM
                if(useShmem_) WAIT_SHARED(offs(), offsLenSampled*sizeof(index_t));
#endif
            }
        }
    }
    
    this->postReadInit(*gh); // Initialize fields of Ebwt not read from file
    if(_verbose || startVerbose) print(cerr, *gh);
    
    // The fact that _ebwt and friends actually point to something
    // (other than NULL) now signals to other member functions that the
    // Ebwt is loaded into memory.
    
    // Be kind
    if(deleteGh) delete gh;

    if(!subIndex) {
#ifdef BOWTIE_MM
        fseek(_in1, 0, SEEK_SET);
        if(loadSASamp) fseek(_in2, 0, SEEK_SET);
#else
        rewind(_in1);
        if(loadSASamp) rewind(_in2);
#endif
    }
}

/**
 * Read reference names from an input stream 'in' for an Ebwt primary
 * file and store them in 'refnames'.
 */
template <typename index_t>
void readGFMRefnames(istream& in, EList<string>& refnames) {
    // _in1 must already be open with the get cursor at the
    // beginning and no error flags set.
    assert(in.good());
    assert_eq((streamoff)in.tellg(), ios::beg);
    
    // Read endianness hints from both streams
    bool switchEndian = false;
    uint32_t one = readU32(in, switchEndian); // 1st word of primary stream
    if(one != 1) {
        assert_eq((1u<<24), one);
        switchEndian = true;
    }
    
    // Reads header entries one by one from primary stream
    readU32(in, switchEndian); // version
    index_t len           = readIndex<index_t>(in, switchEndian);
    index_t gbwtLen       = readIndex<index_t>(in, switchEndian);
    index_t numNodes      = readIndex<index_t>(in, switchEndian);
    int32_t lineRate     = readI32(in, switchEndian);
    /*int32_t  linesPerSide =*/ readI32(in, switchEndian);
    int32_t offRate      = readI32(in, switchEndian);
    int32_t ftabChars    = readI32(in, switchEndian);
    index_t eftabLen     = readIndex<index_t>(in, switchEndian);
    // BTL: chunkRate is now deprecated
    int32_t flags = readI32(in, switchEndian);
    bool entireReverse = false;
    if(flags < 0) {
        entireReverse = (((-flags) & GFM_ENTIRE_REV) != 0);
    }
    
    // Create a new EbwtParams from the entries read from primary stream
    GFM<index_t> gh(len, gbwtLen, numNodes, lineRate, offRate, ftabChars, eftabLen, entireReverse);
    
    index_t nPat = readIndex<index_t>(in, switchEndian); // nPat
    in.seekg(nPat*sizeof(index_t), ios_base::cur); // skip plen
    
    // Skip rstarts
    index_t nFrag = readIndex<index_t>(in, switchEndian);
    in.seekg(nFrag*sizeof(index_t)*3, ios_base::cur);
    
    // Skip ebwt
    in.seekg(gh._ebwtTotLen, ios_base::cur);
    
    // Skip zOff from primary stream
    index_t numZOffs = readIndex<index_t>(in, switchEndian);
    in.seekg(numZOffs * sizeof(index_t), ios_base::cur);
    
    // Skip fchr
    in.seekg(5 * sizeof(index_t), ios_base::cur);
    
    // Skip ftab
    in.seekg(gh._ftabLen*sizeof(index_t), ios_base::cur);
    
    // Skip eftab
    in.seekg(gh._eftabLen*sizeof(index_t), ios_base::cur);
    
    // Read reference sequence names from primary index file
    while(true) {
        char c = '\0';
        in.read(&c, 1);
        if(in.eof()) break;
        if(c == '\0') break;
        else if(c == '\n') {
            refnames.push_back("");
        } else {
            if(refnames.size() == 0) {
                refnames.push_back("");
            }
            refnames.back().push_back(c);
        }
    }
    if(refnames.back().empty()) {
        refnames.pop_back();
    }
    
    // Be kind
    in.clear(); in.seekg(0, ios::beg);
    assert(in.good());
}

/**
 * Read reference names from the index with basename 'in' and store
 * them in 'refnames'.
 */
template <typename index_t>
void readGFMRefnames(const string& instr, EList<string>& refnames) {
    ifstream in;
    // Initialize our primary and secondary input-stream fields
    in.open((instr + ".1." + gfm_ext).c_str(), ios_base::in | ios::binary);
    if(!in.is_open()) {
        throw GFMFileOpenException("Cannot open file " + instr);
    }
    assert(in.is_open());
    assert(in.good());
    assert_eq((streamoff)in.tellg(), ios::beg);
    readGFMRefnames<index_t>(in, refnames);
}

/**
 * Read just enough of the Ebwt's header to get its flags
 */
template <typename index_t>
int32_t GFM<index_t>::readVersionFlags(const string& instr, int& major, int& minor, string& extra_version) {
    ifstream in;
    // Initialize our primary and secondary input-stream fields
    in.open((instr + ".1." + gfm_ext).c_str(), ios_base::in | ios::binary);
    if(!in.is_open()) {
        throw GFMFileOpenException("Cannot open file " + instr);
    }
    assert(in.is_open());
    assert(in.good());
    bool switchEndian = false;
    uint32_t one = readU32(in, switchEndian); // 1st word of primary stream
    if(one != 1) {
        assert_eq((1u<<24), one);
        assert_eq(1, endianSwapU32(one));
        switchEndian = true;
    }
    index_t version = readU32(in, switchEndian);
    readIndexVersion(version, major, minor, extra_version);
    
    readIndex<index_t>(in, switchEndian);
    readIndex<index_t>(in, switchEndian);
    readIndex<index_t>(in, switchEndian);
    readI32(in, switchEndian);
    readI32(in, switchEndian);
    readI32(in, switchEndian);
    readI32(in, switchEndian);
    readIndex<index_t>(in, switchEndian);
    int32_t flags = readI32(in, switchEndian);
    return flags;
}


/**
 * Write an extended Burrows-Wheeler transform to a pair of output
 * streams.
 *
 * @param out1 output stream to primary file
 * @param out2 output stream to secondary file
 * @param be   write in big endian?
 */
template <typename index_t>
void GFM<index_t>::writeFromMemory(bool justHeader,
                                    ostream& out1,
                                    ostream& out2) const
{
    const GFMParams<index_t>& gh = this->_gh;
    assert(gh.repOk());
    uint32_t be = this->toBe();
    assert(out1.good());
    assert(out2.good());

    // When building an Ebwt, these header parameters are known
    // "up-front", i.e., they can be written to disk immediately,
    // before we join() or buildToDisk()
    writeI32(out1, 1, be); // endian hint for priamry stream
    writeI32(out2, 1, be); // endian hint for secondary stream
    int version = getIndexVersion();
    writeI32(out1, version, be); // version
    writeIndex<index_t>(out1, gh._len, be); // length of string (and bwt and suffix array)
    writeIndex<index_t>(out1, 0,       be); // dummy for gbwt len
    writeIndex<index_t>(out1, 0,       be); // dummy for number of nodes
    writeI32(out1, gh._lineRate,       be); // 2^lineRate = size in bytes of 1 line
    writeI32(out1, 2,                  be); // not used
    writeI32(out1, gh._offRate,        be); // every 2^offRate chars is "marked"
    writeI32(out1, gh._ftabChars,      be); // number of 2-bit chars used to address ftab
    writeIndex<index_t>(out1, 0,       be); // eftab length
    int32_t flags = 1;
    if(gh._entireReverse) flags |= GFM_ENTIRE_REV;
    writeI32(out1, -flags, be); // BTL: chunkRate is now deprecated
    
    if(!justHeader) {
        assert(rstarts() != NULL);
        assert(offs() != NULL);
        assert(ftab() != NULL);
        assert(eftab() != NULL);
        assert(isInMemory());
        // These Ebwt parameters are known after the inputs strings have
        // been joined() but before they have been built().  These can
        // written to the disk next and then discarded from memory.
        writeIndex<index_t>(out1, this->_nPat,      be);
        for(index_t i = 0; i < this->_nPat; i++)
            writeIndex<index_t>(out1, this->plen()[i], be);
        assert_geq(this->_nFrag, this->_nPat);
        writeIndex<index_t>(out1, this->_nFrag, be);
        for(size_t i = 0; i < this->_nFrag*3; i++)
            writeIndex<index_t>(out1, this->rstarts()[i], be);
        
        // These Ebwt parameters are discovered only as the Ebwt is being
        // built (in buildToDisk()).  Of these, only 'offs' and 'ebwt' are
        // terribly large.  'ebwt' is written to the primary file and then
        // discarded from memory as it is built; 'offs' is similarly
        // written to the secondary file and discarded.
        writeIndex<index_t>(out1, gh._gbwtTotLen, be);
        out1.write((const char *)this->gfm(), gh._gbwtTotLen);
        writeIndex<index_t>(out1, (index_t)_zOffs.size(), be);
        for(index_t i = 0; i < _zOffs.size(); i++)
            writeIndex<index_t>(out1, _zOffs[i], be);
        index_t offsLen = gh._offsLen;
        for(index_t i = 0; i < offsLen; i++)
            writeIndex<index_t>(out2, this->offs()[i], be);
        
        // 'fchr', 'ftab' and 'eftab' are not fully determined until the
        // loop is finished, so they are written to the primary file after
        // all of 'ebwt' has already been written and only then discarded
        // from memory.
        for(int i = 0; i < 5; i++)
            writeIndex<index_t>(out1, this->fchr()[i], be);
        for(index_t i = 0; i < gh._ftabLen; i++)
            writeIndex<index_t>(out1, this->ftab()[i], be);
        for(index_t i = 0; i < gh._eftabLen; i++)
            writeIndex<index_t>(out1, this->eftab()[i], be);
    }
}

/**
 * Given a pair of strings representing output filenames, and assuming
 * this Ebwt object is currently in memory, write out this Ebwt to the
 * specified files.
 *
 * If sanity-checking is enabled, then once the streams have been
 * fully written and closed, we reopen them and read them into a
 * (hopefully) exact copy of this Ebwt.  We then assert that the
 * current Ebwt and the copy match in all of their fields.
 */
template <typename index_t>
void GFM<index_t>::writeFromMemory(bool justHeader,
                                   const string& out1,
                                   const string& out2) const
{
    ASSERT_ONLY(const GFMParams<index_t>& gh = this->_gh);
    assert(isInMemory());
    assert(gh.repOk());
    
    ofstream fout1(out1.c_str(), ios::binary);
    ofstream fout2(out2.c_str(), ios::binary);
    writeFromMemory(justHeader, fout1, fout2);
    fout1.close();
    fout2.close();
    
    // Read the file back in and assert that all components match
    if(_sanity) {
#if 0
        if(_verbose)
            cout << "Re-reading \"" << out1 << "\"/\"" << out2 << "\" for sanity check" << endl;
        Ebwt copy(out1, out2, _verbose, _sanity);
        assert(!isInMemory());
        copy.loadIntoMemory(eh._color ? 1 : 0, true, false, false);
        assert(isInMemory());
        assert_eq(eh._lineRate,     copy.eh()._lineRate);
        assert_eq(eh._offRate,      copy.eh()._offRate);
        assert_eq(eh._ftabChars,    copy.eh()._ftabChars);
        assert_eq(eh._len,          copy.eh()._len);
        assert_eq(_zOff,             copy.zOff());
        assert_eq(_zEbwtBpOff,       copy.zEbwtBpOff());
        assert_eq(_zEbwtByteOff,     copy.zEbwtByteOff());
        assert_eq(_nPat,             copy.nPat());
        for(index_t i = 0; i < _nPat; i++)
            assert_eq(this->_plen[i], copy.plen()[i]);
        assert_eq(this->_nFrag, copy.nFrag());
        for(size_t i = 0; i < this->nFrag*3; i++) {
            assert_eq(this->_rstarts[i], copy.rstarts()[i]);
        }
        for(index_t i = 0; i < 5; i++)
            assert_eq(this->_fchr[i], copy.fchr()[i]);
        for(size_t i = 0; i < eh._ftabLen; i++)
            assert_eq(this->ftab()[i], copy.ftab()[i]);
        for(size_t i = 0; i < eh._eftabLen; i++)
            assert_eq(this->eftab()[i], copy.eftab()[i]);
        for(index_t i = 0; i < eh._offsLen; i++)
            assert_eq(this->_offs[i], copy.offs()[i]);
        for(index_t i = 0; i < eh._ebwtTotLen; i++)
            assert_eq(this->ebwt()[i], copy.ebwt()[i]);
        copy.sanityCheckAll();
        if(_verbose)
            cout << "Read-in check passed for \"" << out1 << "\"/\"" << out2 << "\"" << endl;
#endif
    }
}

/**
 * Write the rstarts array given the szs array for the reference.
 */
template <typename index_t>
void GFM<index_t>::szsToDisk(const EList<RefRecord>& szs, ostream& os, int reverse) {
    size_t seq = 0;
    index_t off = 0;
    index_t totlen = 0;
    for(size_t i = 0; i < szs.size(); i++) {
        if(szs[i].len == 0) continue;
        if(szs[i].first) off = 0;
        off += szs[i].off;
        if(szs[i].first && szs[i].len > 0) seq++;
        index_t seqm1 = (index_t)(seq-1);
        assert_lt(seqm1, _nPat);
        index_t fwoff = off;
        if(reverse == REF_READ_REVERSE) {
            // Invert pattern idxs
            seqm1 = _nPat - seqm1 - 1;
            // Invert pattern idxs
            assert_leq(off + szs[i].len, plen()[seqm1]);
            fwoff = plen()[seqm1] - (off + szs[i].len);
        }
        writeIndex<index_t>(os, totlen, this->toBe()); // offset from beginning of joined string
        writeIndex<index_t>(os, (index_t)seqm1,  this->toBe()); // sequence id
        writeIndex<index_t>(os, (index_t)fwoff,  this->toBe()); // offset into sequence
        totlen += szs[i].len;
        off += szs[i].len;
    }
}


///////////////////////////////////////////////////////////////////////
//
// Functions for printing and sanity-checking Ebwts
//
///////////////////////////////////////////////////////////////////////

/**
 * Check that the ebwt array is internally consistent up to (and not
 * including) the given side index by re-counting the chars and
 * comparing against the embedded occ[] arrays.
 */
template <typename index_t>
void GFM<index_t>::sanityCheckUpToSide(int upToSide) const {
    assert(isInMemory());
    index_t occ[] = {0, 0, 0, 0};
    ASSERT_ONLY(index_t occ_save[] = {0, 0, 0, 0});
    index_t cur = 0; // byte pointer
    const GFMParams<index_t>& gh = this->_gh;
    bool fw = false;
    while(cur < (upToSide * gh._sideSz)) {
        assert_leq(cur + gh._sideSz, gh._gbwtTotLen);
        for(index_t i = 0; i < gh._sideGbwtSz; i++) {
            uint8_t by = this->gfm()[cur + (fw ? i : gh._sideGbwtSz-i-1)];
            for(int j = 0; j < 4; j++) {
                // Unpack from lowest to highest bit pair
                int twoBit = unpack_2b_from_8b(by, fw ? j : 3-j);
                occ[twoBit]++;
            }
            assert_eq(0, (occ[0] + occ[1] + occ[2] + occ[3]) % 4);
        }
        assert_eq(0, (occ[0] + occ[1] + occ[2] + occ[3]) % gh._sideGbwtLen);
        // Finished forward bucket; check saved [A], [C], [G] and [T]
        // against the index_ts encoded here
        ASSERT_ONLY(const index_t *ugbwt = reinterpret_cast<const index_t*>(&gfm()[cur + gh._sideGbwtSz]));
        ASSERT_ONLY(index_t as = ugbwt[0]);
        ASSERT_ONLY(index_t cs = ugbwt[1]);
        ASSERT_ONLY(index_t gs = ugbwt[2]);
        ASSERT_ONLY(index_t ts = ugbwt[3]);
        assert(as == occ_save[0] || as == occ_save[0]-1);
        assert_eq(cs, occ_save[1]);
        assert_eq(gs, occ_save[2]);
        assert_eq(ts, occ_save[3]);
#ifndef NDEBUG
        occ_save[0] = occ[0];
        occ_save[1] = occ[1];
        occ_save[2] = occ[2];
        occ_save[3] = occ[3];
#endif
        cur += gh._sideSz;
    }
}

/**
 * Sanity-check various pieces of the Ebwt
 */
template <typename index_t>
void GFM<index_t>::sanityCheckAll(int reverse) const {
    const GFMParams<index_t>& gh = this->_gh;
    assert(isInMemory());
    // Check ftab
    for(index_t i = 1; i < gh._ftabLen; i++) {
        assert_geq(this->ftabHi(i), this->ftabLo(i-1));
        assert_geq(this->ftabLo(i), this->ftabHi(i-1));
        assert_leq(this->ftabHi(i), gh._gbwtLen);
    }
    assert_eq(this->ftabHi(gh._ftabLen-1), gh._gbwtLen);
    
    // Check offs
    int seenLen = (gh._gbwtLen + 31) >> ((index_t)5);
    uint32_t *seen;
    try {
        seen = new uint32_t[seenLen]; // bitvector marking seen offsets
    } catch(bad_alloc& e) {
        cerr << "Out of memory allocating seen[] at " << __FILE__ << ":" << __LINE__ << endl;
        throw e;
    }
    memset(seen, 0, 4 * seenLen);
    index_t offsLen = gh._offsLen;
    for(index_t i = 0; i < offsLen; i++) {
        assert_lt(this->offs()[i], gh._gbwtLen);
        int w = this->offs()[i] >> 5;
        int r = this->offs()[i] & 31;
        assert_eq(0, (seen[w] >> r) & 1); // shouldn't have been seen before
        seen[w] |= (1 << r);
    }
    delete[] seen;
    
    // Check nPat
    assert_gt(this->_nPat, 0);
    
    // Check plen, flen
    for(index_t i = 0; i < this->_nPat; i++) {
        assert_geq(this->plen()[i], 0);
    }
    
    // Check rstarts
    if(this->rstarts() != NULL) {
        for(index_t i = 0; i < this->_nFrag-1; i++) {
            assert_gt(this->rstarts()[(i+1)*3], this->rstarts()[i*3]);
            if(reverse == REF_READ_REVERSE) {
                assert(this->rstarts()[(i*3)+1] >= this->rstarts()[((i+1)*3)+1]);
            } else {
                assert(this->rstarts()[(i*3)+1] <= this->rstarts()[((i+1)*3)+1]);
            }
        }
    }
    
    // Check ebwt
    sanityCheckUpToSide(gh._numSides);
    VMSG_NL("Ebwt::sanityCheck passed");
}

/**
 * Transform this Ebwt into the original string in linear time by using
 * the LF mapping to walk backwards starting at the row correpsonding
 * to the end of the string.  The result is written to s.  The Ebwt
 * must be in memory.
 */
template <typename index_t>
void GFM<index_t>::restore(SString<char>& s) const {
    assert(isInMemory());
    s.resize(this->_gh._len);
    index_t jumps = 0;
    index_t i = this->_gh._len; // should point to final SA elt (starting with '$')
    SideLocus<index_t> l(i, this->_gh, this->gfm());
    while(true) {
        for(index_t j = 0; j < _zOffs.size(); j++) {
            if(i == _zOffs[j]) break;
        }
        assert_lt(jumps, this->_gh._len);
        //if(_verbose) cout << "restore: i: " << i << endl;
        // Not a marked row; go back a char in the original string
        index_t newi = mapLF(l ASSERT_ONLY(, false));
        assert_neq(newi, i);
        s[this->_gh._len - jumps - 1] = rowL(l);
        i = newi;
        l.initFromRow(i, this->_gh, this->gfm());
        jumps++;
    }
    assert_eq(jumps, this->_gh._len);
}

/**
 * Check that this Ebwt, when restored via restore(), matches up with
 * the given array of reference sequences.  For sanity checking.
 */
template <typename index_t>
void GFM<index_t>::checkOrigs(
                               const EList<SString<char> >& os,
                               bool mirror) const
{
    SString<char> rest;
    restore(rest);
    index_t restOff = 0;
    size_t i = 0, j = 0;
    if(mirror) {
        // TODO: FIXME
        return;
    }
    while(i < os.size()) {
        size_t olen = os[i].length();
        int lastorig = -1;
        for(; j < olen; j++) {
            size_t joff = j;
            if(mirror) joff = olen - j - 1;
            if((int)os[i][joff] == 4) {
                // Skip over Ns
                lastorig = -1;
                if(!mirror) {
                    while(j < olen && (int)os[i][j] == 4) j++;
                } else {
                    while(j < olen && (int)os[i][olen-j-1] == 4) j++;
                }
                j--;
                continue;
            }
            assert_eq(os[i][joff], rest[restOff]);
            lastorig = (int)os[i][joff];
            restOff++;
        }
        if(j == os[i].length()) {
            // Moved to next sequence
            i++;
            j = 0;
        } else {
            // Just jumped over a gap
        }
    }
}

/**
 * Try to find the Bowtie index specified by the user.  First try the
 * exact path given by the user.  Then try the user-provided string
 * appended onto the path of the "indexes" subdirectory below this
 * executable, then try the provided string appended onto
 * "$HISAT2_INDEXES/".
 */
string adjustEbwtBase(const string& cmdline,
                      const string& ebwtFileBase,
                      bool verbose = false);

#endif /*GFM_H_*/
