#include "codonUtils.h"
#include "numRec.h"
#include <algorithm>





//check that the input sequences are divisable by 3
void checkInputSeqLength(string codonFile){
	nucleotide alph;
	ifstream in(codonFile.c_str());
	sequenceContainer inputSc = recognizeFormat::readUnAligned(in, &alph);
	in.close();
	int i;
    for (i = 0; i < inputSc.numberOfSeqs(); ++i){
		int seqLen = inputSc[i].seqLen();
		if ((seqLen % 3) != 0){
			string textToPrint = "USER ERROR: unable to read sequence: " + inputSc[i].name() + "\nSequence length is not divisable by three";
			errorMsg::reportError(textToPrint);
		}
	}
}

//this function convert codon sequences to amino sequences.
sequenceContainer convertCodonToAmino(sequenceContainer &codonSc,codon *codonAlph){
	amino aaAlph;
	sequenceContainer aaSc;
	for (int i = 0; i < codonSc.numberOfSeqs(); ++i){
		sequence codonSeq = codonSc[i];
		sequence aaSeq("", codonSeq.name(), codonSeq .remark(), codonSeq.id(), &aaAlph);
		for (int pos = 0; pos < codonSeq .seqLen(); ++pos)
            aaSeq.push_back(codonUtility::aaOf(codonSeq[pos],*codonAlph));
		aaSc.add(aaSeq);
	}
	if (codonSc.numberOfSeqs() != aaSc.numberOfSeqs())
		errorMsg::reportError("RevTrans: number of codon and Amino sequences is not the same");
	
	return aaSc;
}

// returns 1/sumPijQij
MDOUBLE getMatricesNormalizationFactor(vector<stochasticProcess> & spVec,const distribution * forceDistr){
	MDOUBLE sumPijQij=0.0;
	int categor;
	for ( categor=0; categor<forceDistr->categories();categor++)
		sumPijQij+=forceDistr->ratesProb(categor)*static_cast<wYangModel*>(spVec[categor].getPijAccelerator()->getReplacementModel())->sumPijQij();	
	if (sumPijQij ==0){
		errorMsg::reportError("Error in getMatricesNormalizationFactor - sumPijQij=0");
	}
	return sumPijQij;
}

// normalize the Q matrix so average rate of substitution = 1
void normalizeMatrices(vector<stochasticProcess> & spVec,const distribution * forceDistr){
	MDOUBLE sumPijQij=0.0;
	int categor;
	for ( categor=0; categor<forceDistr->categories();categor++)
		sumPijQij+=forceDistr->ratesProb(categor)*static_cast<wYangModel*>(spVec[categor].getPijAccelerator()->getReplacementModel())->sumPijQij();	
	if (sumPijQij ==0){
		errorMsg::reportError("Error in normalizeMatrices - sumPijQij=0");
	}
	for (categor=0; categor<forceDistr->categories();categor++)
		static_cast<wYangModel*>(spVec[categor].getPijAccelerator()->getReplacementModel())->norm(1/sumPijQij);

}

Vdouble freqCodonF3x4(const sequenceContainer &nucSc, codon * coAlph){
	VVdouble nucFeqPos(3);
	int pos= 0;
	int nPos = 0;
	for (nPos=0;nPos<3;nPos++)
		nucFeqPos[nPos].resize(nucSc.alphabetSize(),0.0);

	sequenceContainer::constTaxaIterator tIt;
	sequenceContainer::constTaxaIterator tItEnd;
	tIt.begin(nucSc);
	tItEnd.end(nucSc);
	while (tIt!= tItEnd) {
		pos = 0;
		sequence::constIterator sIt;
		sequence::constIterator sItEnd;
		sIt.begin(*tIt);
		sItEnd.end(*tIt);
		while (sIt != sItEnd) {
			if ((*sIt >= 0) && (*sIt <nucFeqPos[pos%3].size())) ++nucFeqPos[pos%3][(*sIt)];
			if (*sIt == 4) ++nucFeqPos[pos%3][3]; //for T (4) to U (3)
			++sIt;
			++pos;
		}
		++tIt;
	}
	for (nPos=0;nPos<3;nPos++)
		changeCountsToFreqs(nucFeqPos[nPos]);


	Vdouble freqCodon(coAlph->size(),0.0);

	nucleotide n;
	for (int c = 0; c<freqCodon.size();c++){

		string s = coAlph->fromInt(c);
		int nuc0 = n.fromChar(s[0]);
		int nuc1 = n.fromChar(s[1]);
		int nuc2 = n.fromChar(s[2]);
		freqCodon[c] = nucFeqPos[0][nuc0]*nucFeqPos[1][nuc1]*nucFeqPos[2][nuc2];
	}

	MDOUBLE sum=0;
	for (int i=0;i<coAlph->size();i++){
		sum+=freqCodon[i];
	}
	MDOUBLE stopFreq  = 1.0 - sum;
	MDOUBLE  ep = stopFreq/coAlph->size();
	for (int i=0;i<coAlph->size();i++){
		freqCodon[i]+=ep;
	}

	return freqCodon;


}


/***********************************************
  The following functions are useful for the selecton server, for creating a 
  Rasmol script and for setting the color value of each site
 ***********************************************/


// Positive significant in color dark yellow, non-sig. positive selection - light yellow. 
// Purifying selection in shades of bordeaux
vector<vector<int> > create7ColorValues(){
	vector<vector<int> > colorsValue;
	colorsValue.resize(7);
	for (int i=0;i<7;i++)
		colorsValue[i].resize(3);
	// RGB values of the differnt color bins
	colorsValue[0][0] = 255; //yellow positive significant
	colorsValue[0][1] = 220 ;
	colorsValue[0][2] = 0;

	colorsValue[1][0] =255 ; //light yellow - not  significant positive selection
	colorsValue[1][1] = 255;
	colorsValue[1][2] = 120;
	
	//three categories of not significant negative selection according to bordeaux shades (colors like conseq/consurf)

	colorsValue[2][0] = 255; //white  
	colorsValue[2][1] = 255;
	colorsValue[2][2] = 255;

	colorsValue[3][0] = 252;
	colorsValue[3][1] = 237;
	colorsValue[3][2] = 244;

	colorsValue[4][0] = 250;
	colorsValue[4][1] = 201;
	colorsValue[4][2] = 222;

	colorsValue[5][0] = 240;
	colorsValue[5][1] = 125;
	colorsValue[5][2] = 171;
	
	//significant negative selection
	colorsValue[6][0] = 130; 
	colorsValue[6][1] = 67;
	colorsValue[6][2] = 96;

	return colorsValue;
}

//this functions creates a rasmol script (assumes positions are the same between the alignment and the PDB)
void outToRasmolFile(string fileName,vector<int>& color4Site){
	ofstream out(fileName.c_str());	
	vector<vector<int> > colorsValue = create7ColorValues();
	int numberOfColor = colorsValue.size();
	vector<vector<int> > colors; //for each color (1-9/3) holds vector of sites.
	colors.resize(numberOfColor+1);
	int i;
	for (i=0;i<color4Site.size();i++){
		int color=color4Site[i];
		if (color>numberOfColor){
			errorMsg::reportError("Error in outToColorFile - unknown color");
		}
		colors[color].push_back(i+1); //add site (position in the vector +1)	
	}
	out<<"select all"<<endl;
	out<<"color [200,200,200]"<<endl<<endl;
	
	for (int c=1;c<numberOfColor+1;c++){
		out<<"select ";
		for (i=0;i<colors[c].size();i++){
			if (i==0)
				out<<colors[c][i];
			else if ((i+1)%6==0)
				out<<endl<<"select selected or "<<colors[c][i];
			 
			else out<<" , "<<colors[c][i];
		}
		out<<endl<<"select selected and :a"<<endl;
		out<<"color [" <<colorsValue[c-1][0]<<","<<colorsValue[c-1][1]<<","<<colorsValue[c-1][2]<<"]"<<endl;
		out<<"spacefill"<<endl<<endl;
	}
	
	out.close();
}


// a file with color-coding from Ka/Ks values to color-bins
void kaks2Color(const Vdouble & kaksVec, const Vdouble &lowerBoundV,
				const sequence & refSeq, string fileName,codon *co) {
	vector<int> colors;
	int numOfSitesinAln = kaksVec.size();
	Vdouble negativesKaksVec,negativesSite;
	negativesKaksVec.clear();
	negativesSite.clear();
	int i,gapsInRefSeq=0;

	for (i=0;i<numOfSitesinAln;i++){
		if (codonUtility::aaOf(refSeq[i],*co) == -1) gapsInRefSeq++; 
	}

	// first dealing with positive selection
	colors.resize(numOfSitesinAln-gapsInRefSeq);
	int gap=0;
	for (i=0;i<numOfSitesinAln;i++){
		if (codonUtility::aaOf(refSeq[i],*co) == -1){
			gap++;
			continue;
		}
		if (lowerBoundV[i]>1) // color 1 (positive selection) : if confidence interval lower bound > 1
			colors[i-gap]=1;
		else if (kaksVec[i]>1) // color 2(positive selection) : "non-significant"
			colors[i-gap]=2;
		else  {
			negativesKaksVec.push_back(kaksVec[i]);  //add the value of kaks < 1
			negativesSite.push_back(i-gap);   //add the number of site of the kaks 
		}
	
	}

	// now dealing with purifying selection
	Vdouble orderVec = negativesKaksVec;
	if (orderVec.size()>0) // this is since once the whole protein was positive selection... (anomaly)
		sort(orderVec.begin(), orderVec.end());  //sort the kaks values to be divided to 5 groups
	MDOUBLE percentileNum = 5.0;
	int percentileNumInt = 5;
	Vdouble maxScoreForPercentile(percentileNumInt);
	if (orderVec.size()>0) {
		maxScoreForPercentile[0] = orderVec[0]; 
		for (int c = 1; c < percentileNumInt; ++c){
			int place = (int)((c / percentileNum) * negativesKaksVec.size());
			MDOUBLE maxScore = orderVec[place];
			maxScoreForPercentile[c] = maxScore;
		}
	}

	//loop over all the Ka/Ks < 1  
	for (int j=0; j < negativesKaksVec.size(); ++j){
			MDOUBLE r = negativesKaksVec[j]; //the kaks of the site.
			int s = (int)negativesSite[j];  //the  site.
			if (r > maxScoreForPercentile[4]) 
					colors[s] = 3;
			else if (r > maxScoreForPercentile[3]) 
					colors[s] = 4;
			else if (r> maxScoreForPercentile[2])
					colors[s] = 5;
			else if (r > maxScoreForPercentile[1])
					colors[s] = 6;
			else if (r >= maxScoreForPercentile[0])
					colors[s] = 7;
	}
	//print to file
	ofstream out(fileName.c_str());
	gap=0;
	amino aminoAcid;
	LOG(5,<<"Printing selection color bins to file"<<endl);
	for (i=0;i<refSeq.seqLen();i++){	 
		int aa = codonUtility::aaOf(refSeq[i], *co);
		if (aa==-1){
			gap++;
			continue;
		}
		string aaStr = aminoAcid.fromInt(aa);
		out<<i+1-gap <<"\t"<<aaStr<<"\t"<<colors[i-gap];
		out<<endl;
	}
	out.close();
}
