/*
 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
 *
 * This file is part of Bowtie 2.
 *
 * Bowtie 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Bowtie 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef IVAL_LIST_H_
#define IVAL_LIST_H_

#include "ds.h"
#include "ref_coord.h"
#include <utility>

/**
 * Encapsulates the "union" of a collection of intervals.  Intervals are stored
 * in a sorted list.  Intervals can be added but not removed.  Supports just
 * one type of query for now: locusPresent().
 */
class EIvalMergeList {
public:

	static const size_t DEFAULT_UNSORTED_SZ = 16;

	explicit EIvalMergeList(int cat = 0) :
		sorted_(cat),
		sortedLhs_(cat),
		unsorted_(cat),
		unsortedSz_(DEFAULT_UNSORTED_SZ)
	{ }

	explicit EIvalMergeList(size_t unsortedSz, int cat = 0) :
		sorted_(cat),
		sortedLhs_(cat),
		unsorted_(cat),
		unsortedSz_(unsortedSz)
	{ }
	
	/**
	 * Set the maximum size of the unsorted list.
	 */
	void setUnsortedSize(size_t usz) {
		unsortedSz_ = usz;
	} 
	
	/**
	 * Add a new interval to the list.
	 */
	void add(const Interval& i) {
		assert_leq(unsorted_.size(), unsortedSz_);
		if(unsorted_.size() < unsortedSz_) {
			unsorted_.push_back(i);
		}
		if(unsorted_.size() == unsortedSz_) {
			flush();
		}
	}

	/**
	 * Move all unsorted interval information into the sorted list and re-sort.
	 * Merge overlapping intervals.
	 */
	void flush() {
		for(size_t i = 0; i < unsorted_.size(); i++) {
			sorted_.push_back(unsorted_[i]);
		}
		sorted_.sort();
		merge();
		sortedLhs_.clear();
		for(size_t i = 0; i < sorted_.size(); i++) {
			sortedLhs_.push_back(sorted_[i].upstream());
		}
		assert(sortedLhs_.sorted());
		unsorted_.clear();
	}
	
#ifndef NDEBUG
	/**
	 * Check that this interval list is internally consistent.
	 */
	bool repOk() const {
		assert_eq(sorted_.size(), sortedLhs_.size());
		return true;
	}
#endif
	
	/**
	 * Remove all ranges from the list.
	 */
	void reset() { clear(); }
	
	/**
	 * Remove all ranges from the list.
	 */
	void clear() {
		sorted_.clear();
		sortedLhs_.clear();
		unsorted_.clear();
	}
	
	/**
	 * Return true iff this locus is present in one of the intervals in the
	 * list.
	 */
	bool locusPresent(const Coord& loc) const {
		return locusPresentUnsorted(loc) || locusPresentSorted(loc);
	}
	
	/**
	 * Return the number of intervals added since the last call to reset() or
	 * clear().
	 */
	size_t size() const {
		return sorted_.size() + unsorted_.size();
	}
	
	/**
	 * Return true iff list is empty.
	 */
	bool empty() const {
		return sorted_.empty() && unsorted_.empty();
	}
	
protected:
	
	/**
	 * Go through the sorted interval list and merge adjacent entries that
	 * overlap.
	 */
	void merge() {
		size_t nmerged = 0;
		for(size_t i = 1; i < sorted_.size(); i++) {
			if(sorted_[i-1].downstream() >= sorted_[i].upstream()) {
				nmerged++;
				assert_leq(sorted_[i-1].upstream(), sorted_[i].upstream());
				Coord up = std::min(sorted_[i-1].upstream(), sorted_[i].upstream());
				Coord dn = std::max(sorted_[i-1].downstream(), sorted_[i].downstream());
				sorted_[i].setUpstream(up);
				sorted_[i].setLength(dn.off() - up.off());
				sorted_[i-1].reset();
			}
		}
		sorted_.sort();
		assert_lt(nmerged, sorted_.size());
		sorted_.resize(sorted_.size()-nmerged);
#ifndef NDEBUG
		for(size_t i = 0; i < sorted_.size(); i++) {
			assert(sorted_[i].inited());
		}
#endif
	}

	/**
	 * Return true iff the given locus is present in one of the intervals in
	 * the sorted list.
	 */
	bool locusPresentSorted(const Coord& loc) const {
		assert(repOk());
		if(sorted_.empty()) {
			return false;
		}
		size_t beg = sortedLhs_.bsearchLoBound(loc);
		if(beg == sortedLhs_.size() || sortedLhs_[beg] > loc) {
			// Check element before
			if(beg == 0) {
				return false;
			}
			return sorted_[beg-1].contains(loc);
		} else {
			assert_eq(loc, sortedLhs_[beg]);
			return true;
		}
	}

	/**
	 * Return true iff the given locus is present in one of the intervals in
	 * the unsorted list.
	 */
	bool locusPresentUnsorted(const Coord& loc) const {
		for(size_t i = 0; i < unsorted_.size(); i++) {
			if(unsorted_[i].contains(loc)) {
				return true;
			}
		}
		return false;
	}

	EList<Interval> sorted_;     // LHS, RHS sorted
	EList<Coord>    sortedLhs_;  // LHS, index into sorted_, sorted
	EList<Interval> unsorted_;   // unsorted
	size_t          unsortedSz_; // max allowed size of unsorted_
};

/**
 * Binned version of the above.  We bin using the low bits of the reference
 * sequence.
 */
class EIvalMergeListBinned {
public:

	static const size_t NBIN = 7;

	explicit EIvalMergeListBinned(int cat = 0) : bins_(1 << NBIN, cat) {
		bins_.resize(1 << NBIN);
	}

	explicit EIvalMergeListBinned(
		size_t unsortedSz,
		int cat = 0) : bins_(1 << NBIN, cat)
	{
		bins_.resize(1 << NBIN);
		for(size_t i = 0; i < (1 << NBIN); i++) {
			bins_[i].setUnsortedSize(unsortedSz);
		}
	}

	/**
	 * Add a new interval to the list.
	 */
	void add(const Interval& i) {
		size_t bin = i.ref() & ~(0xffffffff << NBIN);
		assert_lt(bin, bins_.size());
		bins_[bin].add(i);
	}

#ifndef NDEBUG
	/**
	 * Check that this interval list is internally consistent.
	 */
	bool repOk() const {
		for(size_t i = 0; i < bins_.size(); i++) {
			assert(bins_[i].repOk());
		}
		return true;
	}
#endif
	
	/**
	 * Remove all ranges from the list.
	 */
	void reset() { clear(); }
	
	/**
	 * Remove all ranges from the list.
	 */
	void clear() {
		for(size_t i = 0; i < bins_.size(); i++) {
			bins_[i].clear();
		}
	}
	
	/**
	 * Return true iff this locus is present in one of the intervals in the
	 * list.
	 */
	bool locusPresent(const Coord& loc) const {
		size_t bin = loc.ref() & ~(0xffffffff << NBIN);
		assert_lt(bin, bins_.size());
		return bins_[bin].locusPresent(loc);
	}
	
	/**
	 * Return the number of intervals added since the last call to reset() or
	 * clear().
	 */
	size_t size() const {
		// TODO: Keep track of size
		size_t sz = 0;
		for(size_t i = 0; i < bins_.size(); i++) {
			sz += bins_[i].size();
		}
		return sz;
	}
	
	/**
	 * Return true iff list is empty.
	 */
	bool empty() const {
		return size() == 0;
	}
	
protected:
	
	EList<EIvalMergeList> bins_;
};

#endif /*ndef IVAL_LIST_H_*/
