/*
 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
 *
 * This file is part of Bowtie 2.
 *
 * Bowtie 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Bowtie 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <iostream>
#include "edit.h"

using namespace std;

/**
 * Print a single edit to a std::ostream.  Format is
 * (pos):(ref chr)>(read chr).  Where 'pos' is an offset from the 5'
 * end of the read, and the ref and read chrs are expressed w/r/t the
 * Watson strand.
 */
ostream& operator<< (ostream& os, const Edit& e) {
    if(e.type != EDIT_TYPE_SPL) {
        os << e.pos << ":" << (char)e.chr << ">" << (char)e.qchr;
    } else {
        os << e.pos << ":" << e.splLen;
    }

	return os;
}

/**
 * Print a list of edits to a std::ostream, separated by commas.
 */
void Edit::print(ostream& os, const EList<Edit>& edits, char delim) {
	for(size_t i = 0; i < edits.size(); i++) {
		os << edits[i];
		if(i < edits.size()-1) os << delim;
	}
}

void Edit::complement(EList<Edit> &edits)
{
    for(size_t i = 0; i < edits.size(); i++) {
        Edit& e = edits[i];

        if(e.type != EDIT_TYPE_SPL) {
			assert_in(e.chr, "ACMGRSVTWYHKDBN-");
			assert_in(e.qchr, "ACGTN-");

			e.chr = asc2dnacomp[e.chr];
			e.qchr = asc2dnacomp[e.qchr];
        }
    }
}

/**
 * Flip all the edits.pos fields so that they're with respect to
 * the other end of the read (of length 'sz').
 */
void Edit::invertPoss(
	EList<Edit>& edits,
	size_t sz,
	size_t ei,
	size_t en,
	bool sort)
{
	// Invert elements
	size_t ii = 0;
	for(size_t i = ei; i < ei + en/2; i++) {
		Edit tmp = edits[i];
		edits[i] = edits[ei + en - ii - 1];
		edits[ei + en - ii - 1] = tmp;
		ii++;
	}
	for(size_t i = ei; i < ei + en; i++) {
		assert(edits[i].pos < sz ||
			   (edits[i].isReadGap() && edits[i].pos == sz));
		// Adjust pos
        if(edits[i].isReadGap() || edits[i].isSpliced()) {
            edits[i].pos = (uint32_t)(sz - edits[i].pos);
        } else {
            edits[i].pos = (uint32_t)(sz - edits[i].pos - 1);
        }
		// Adjust pos2
		if(edits[i].isReadGap()) {
			int64_t pos2diff = (int64_t)(uint64_t)edits[i].pos2 - (int64_t)((uint64_t)std::numeric_limits<uint32_t>::max() >> 1);
			int64_t pos2new = (int64_t)(uint64_t)edits[i].pos2 - 2*pos2diff;
			assert(pos2diff == 0 || (uint32_t)pos2new != (std::numeric_limits<uint32_t>::max() >> 1));
			edits[i].pos2 = (uint32_t)pos2new;
		}
	}
	if(sort) {
		// Edits might not necessarily be in same order after inversion
		edits.sortPortion(ei, en);
#ifndef NDEBUG
		for(size_t i = ei + 1; i < ei + en; i++) {
			assert_geq(edits[i].pos, edits[i-1].pos);
		}
#endif
	}
}

/**
 * For now, we pretend that the alignment is in the forward orientation
 * and that the Edits are listed from left- to right-hand side.
 */
void Edit::printQAlign(
	std::ostream& os,
	const BTDnaString& read,
	const EList<Edit>& edits)
{
	printQAlign(os, "", read, edits);
}

/**
 * For now, we pretend that the alignment is in the forward orientation
 * and that the Edits are listed from left- to right-hand side.
 */
void Edit::printQAlignNoCheck(
	std::ostream& os,
	const BTDnaString& read,
	const EList<Edit>& edits)
{
	printQAlignNoCheck(os, "", read, edits);
}

/**
 * For now, we pretend that the alignment is in the forward orientation
 * and that the Edits are listed from left- to right-hand side.
 */
void Edit::printQAlign(
	std::ostream& os,
	const char *prefix,
	const BTDnaString& read,
	const EList<Edit>& edits)
{
	size_t eidx = 0;
	os << prefix;
	// Print read
	for(size_t i = 0; i < read.length(); i++) {
		bool del = false, mm = false;
		while(eidx < edits.size() && edits[eidx].pos == i) {
			if(edits[eidx].isReadGap()) {
				os << '-';
			} else if(edits[eidx].isRefGap()) {
				del = true;
				assert_eq((int)edits[eidx].qchr, read.toChar(i));
				os << read.toChar(i);
			} else {
				mm = true;
				assert(edits[eidx].isMismatch());
				assert_eq((int)edits[eidx].qchr, read.toChar(i));
				os << (char)edits[eidx].qchr;
			}
			eidx++;
		}
		if(!del && !mm) os << read.toChar(i);
	}
	os << endl;
	os << prefix;
	eidx = 0;
	// Print match bars
	for(size_t i = 0; i < read.length(); i++) {
		bool del = false, mm = false;
		while(eidx < edits.size() && edits[eidx].pos == i) {
			if(edits[eidx].isReadGap()) {
				os << ' ';
			} else if(edits[eidx].isRefGap()) {
				del = true;
				os << ' ';
			} else {
				mm = true;
				assert(edits[eidx].isMismatch());
				os << ' ';
			}
			eidx++;
		}
		if(!del && !mm) os << '|';
	}
	os << endl;
	os << prefix;
	eidx = 0;
	// Print reference
	for(size_t i = 0; i < read.length(); i++) {
		bool del = false, mm = false;
		while(eidx < edits.size() && edits[eidx].pos == i) {
			if(edits[eidx].isReadGap()) {
				os << (char)edits[eidx].chr;
			} else if(edits[eidx].isRefGap()) {
				del = true;
				os << '-';
			} else {
				mm = true;
				assert(edits[eidx].isMismatch());
				os << (char)edits[eidx].chr;
			}
			eidx++;
		}
		if(!del && !mm) os << read.toChar(i);
	}
	os << endl;
}

/**
 * For now, we pretend that the alignment is in the forward orientation
 * and that the Edits are listed from left- to right-hand side.
 */
void Edit::printQAlignNoCheck(
	std::ostream& os,
	const char *prefix,
	const BTDnaString& read,
	const EList<Edit>& edits)
{
	size_t eidx = 0;
	os << prefix;
	// Print read
	for(size_t i = 0; i < read.length(); i++) {
		bool del = false, mm = false;
		while(eidx < edits.size() && edits[eidx].pos == i) {
			if(edits[eidx].isReadGap()) {
				os << '-';
			} else if(edits[eidx].isRefGap()) {
				del = true;
				os << read.toChar(i);
			} else {
				mm = true;
				os << (char)edits[eidx].qchr;
			}
			eidx++;
		}
		if(!del && !mm) os << read.toChar(i);
	}
	os << endl;
	os << prefix;
	eidx = 0;
	// Print match bars
	for(size_t i = 0; i < read.length(); i++) {
		bool del = false, mm = false;
		while(eidx < edits.size() && edits[eidx].pos == i) {
			if(edits[eidx].isReadGap()) {
				os << ' ';
			} else if(edits[eidx].isRefGap()) {
				del = true;
				os << ' ';
			} else {
				mm = true;
				os << ' ';
			}
			eidx++;
		}
		if(!del && !mm) os << '|';
	}
	os << endl;
	os << prefix;
	eidx = 0;
	// Print reference
	for(size_t i = 0; i < read.length(); i++) {
		bool del = false, mm = false;
		while(eidx < edits.size() && edits[eidx].pos == i) {
			if(edits[eidx].isReadGap()) {
				os << (char)edits[eidx].chr;
			} else if(edits[eidx].isRefGap()) {
				del = true;
				os << '-';
			} else {
				mm = true;
				os << (char)edits[eidx].chr;
			}
			eidx++;
		}
		if(!del && !mm) os << read.toChar(i);
	}
	os << endl;
}

/**
 * Sort the edits in the provided list.
 */
void Edit::sort(EList<Edit>& edits) {
	edits.sort(); // simple!
}

/**
 * Given a read string and some edits, generate and append the corresponding
 * reference string to 'ref'.  If read aligned to the Watson strand, the caller
 * should pass the original read sequence and original edits.  If a read
 * aligned to the Crick strand, the caller should pass the reverse complement
 * of the read and a version of the edits list that has had Edit:invertPoss
 * called on it to cause edits to be listed in 3'-to-5' order.
 */
void Edit::toRef(
	const BTDnaString& read,
	const EList<Edit>& edits,
	BTDnaString& ref,
	bool fw,
	size_t trim5,
	size_t trim3)
{
	// edits should be sorted
	size_t eidx = 0;
	// Print reference
	const size_t rdlen = read.length();
	size_t trimBeg = fw ? trim5 : trim3;
	size_t trimEnd = fw ? trim3 : trim5;
	assert(Edit::repOk(edits, read, fw, trim5, trim3));
	if(!fw) {
		invertPoss(const_cast<EList<Edit>&>(edits), read.length()-trimBeg-trimEnd, false);
	}
	for(size_t i = 0; i < rdlen; i++) {
		ASSERT_ONLY(int c = read[i]);
		assert_range(0, 4, c);
		bool del = false, mm = false;
		bool append = i >= trimBeg && rdlen - i - 1 >= trimEnd;
		bool appendIns = i >= trimBeg && rdlen - i >= trimEnd;
		while(eidx < edits.size() && edits[eidx].pos+trimBeg == i) {
			if(edits[eidx].isReadGap()) {
				// Inserted characters come before the position's
				// character
				if(appendIns) {
					ref.appendChar((char)edits[eidx].chr);
				}
			} else if(edits[eidx].isRefGap()) {
				assert_eq("ACGTN"[c], edits[eidx].qchr);
				del = true;
			} else if(edits[eidx].isMismatch()){
				mm = true;
				assert(edits[eidx].qchr != edits[eidx].chr || edits[eidx].qchr == 'N');
				assert_eq("ACGTN"[c], edits[eidx].qchr);
				if(append) {
					ref.appendChar((char)edits[eidx].chr);
				}
			}
			eidx++;
		}
		if(!del && !mm) {
			if(append) {
				ref.append(read[i]);
			}
		}
	}
	if(trimEnd == 0) {
		while(eidx < edits.size()) {
			assert_gt(rdlen, edits[eidx].pos);
			if(edits[eidx].isReadGap()) {
				ref.appendChar((char)edits[eidx].chr);
			}
			eidx++;
		}
	}
	if(!fw) {
		invertPoss(const_cast<EList<Edit>&>(edits), read.length()-trimBeg-trimEnd, false);
	}
}

#ifndef NDEBUG
/**
 * Check that the edit is internally consistent.
 */
bool Edit::repOk() const {
    assert(inited());
	// Ref and read characters cannot be the same unless they're both Ns
    if(type != EDIT_TYPE_SPL) {
        assert(qchr != chr || qchr == 'N');
        // Type must match characters
        assert(isRefGap() ||  chr != '-');
        assert(isReadGap() || qchr != '-');
        assert(!isMismatch() || (qchr != '-' && chr != '-'));
    } else {
        assert_gt(splLen, 0);
    }
	return true;
}

/**
 * Given a list of edits and a DNA string representing the query
 * sequence, check that the edits are consistent with respect to the
 * query.
 */
bool Edit::repOk(
	const EList<Edit>& edits,
	const BTDnaString& s,
	bool fw,
	size_t trimBeg,
	size_t trimEnd)
{
	if(!fw) {
		invertPoss(const_cast<EList<Edit>&>(edits), s.length()-trimBeg-trimEnd, false);
		swap(trimBeg, trimEnd);
	}
	for(size_t i = 0; i < edits.size(); i++) {
		const Edit& e = edits[i];
		size_t pos = e.pos;
		if(i > 0) {
			assert_geq(pos, edits[i-1].pos);
		}
		bool del = false, mm = false;
		while(i < edits.size() && edits[i].pos == pos) {
			const Edit& ee = edits[i];
			assert_lt(ee.pos, s.length());
            if(ee.type != EDIT_TYPE_SPL) {
                if(ee.qchr != '-') {
                    assert(ee.isRefGap() || ee.isMismatch());
                    //assert_eq((int)ee.qchr, s.toChar(ee.pos+trimBeg));
                }
            }
			if(ee.isMismatch()) {
				assert(!mm);
				mm = true;
				assert(!del);
			} else if(ee.isReadGap()) {
				assert(!mm);
			} else if(ee.isRefGap()) {
				assert(!mm);
				assert(!del);
				del = true;
			} else if(ee.isSpliced()) {
                
            }
			i++;
		}
	}
	if(!fw) {
		invertPoss(const_cast<EList<Edit>&>(edits), s.length()-trimBeg-trimEnd, false);
	}
	return true;
}
#endif

/**
 * Merge second argument into the first.  Assume both are sorted to
 * begin with.
 */
void Edit::merge(EList<Edit>& dst, const EList<Edit>& src) {
	size_t di = 0, si = 0;
	while(di < dst.size()) {
		if(src[si].pos < dst[di].pos) {
			dst.insert(src[si], di);
			si++; di++;
		} else if(src[si].pos == dst[di].pos) {
			// There can be two inserts at a given position, but we
			// can't merge them because there's no way to know their
			// order
			assert(src[si].isReadGap() != dst[di].isReadGap());
			if(src[si].isReadGap()) {
				dst.insert(src[si], di);
				si++; di++;
			} else if(dst[di].isReadGap()) {
				di++;
			}
		}
	}
	while(si < src.size()) dst.push_back(src[si++]);
}

/**
 * Clip off some of the low-numbered positions.
 */
void Edit::clipLo(EList<Edit>& ed, size_t len, size_t amt) {
	size_t nrm = 0;
	for(size_t i = 0; i < ed.size(); i++) {
		assert_lt(ed[i].pos, len);
		if(ed[i].pos < amt) {
			nrm++;
		} else {
			// Shift everyone else up
			ed[i].pos -= (uint32_t)amt;
		}
	}
	ed.erase(0, nrm);
}

/**
 * Clip off some of the high-numbered positions.
 */
void Edit::clipHi(EList<Edit>& ed, size_t len, size_t amt) {
	assert_leq(amt, len);
	size_t max = len - amt;
	size_t nrm = 0;
	for(size_t i = 0; i < ed.size(); i++) {
		size_t ii = ed.size() - i - 1;
		assert_lt(ed[ii].pos, len);
		if(ed[ii].pos > max) {
			nrm++;
		} else if(ed[ii].pos == max && !ed[ii].isReadGap()) {
			nrm++;
		} else {
			break;
		}
	}
	ed.resize(ed.size() - nrm);
}
