// $Id: fromQtoPt.cpp 5788 2009-01-19 22:24:16Z rubi $ #include "definitions.h" #include "fromQtoPt.h" #include "errorMsg.h" #include "numRec.h" #include "matrixUtils.h" #include using namespace std; #include //#define VERBOS void q2pt::fillFromRateMatrix(const vector& freq, const VVdouble & qMatrix) { // we first decompose Q to (F^0.5) M (F^-0.5) // F is a diagonal matrix of the frequencies // M is the symetrical matrix representation of Q. VVdouble q_sym; const int matrix_size = qMatrix.size(); q_sym.resize(matrix_size); int k=0; for (k=0; k < q_sym.size(); ++k) q_sym[k].resize(matrix_size); calc_symmetric_q(qMatrix,q_sym,freq); // now we have to find the eigen-vector decomposition of the q_sym. VVdouble v; // v is the eigen vectors of the symetrical matrix. v.resize(matrix_size); for (k=0; k < qMatrix.size(); ++k) v[k].resize(matrix_size); Vdouble eigenValues(matrix_size); // symmetric_1pam = [v] [eigenValues] [transpose(v)] //MyJacobi(q_sym,v, eigenValues); // notice that inv([v]) = [v] transpose; /////i changed computeEigenSystem(q_sym,v,eigenValues); //// //#ifdef VERBOS // LOG(5,<<"The eigen-vector matrix of the decomposition of the symetric matrix\n"); // for (int k1=0; k1 < v.size(); ++k1) { // for (int k2=0; k2& freq,const VVdouble & onePam) { fillFromRateMatrix(freq,onePam); for (int i=0; i < _eigenVector.size(); ++i) { assert(_eigenVector[i]>0); _eigenVector[i] = log(_eigenVector[i])* 100; } } bool q2pt::currectFloatingPointProblems(MDOUBLE& sum) const { if ((sum * (sum+err_allow_for_pijt_function))<0) sum=0; if (((sum-1) * (sum-1.0-err_allow_for_pijt_function))<0) sum=1; if (!((sum<=1) && (sum>=0))) return false; return true; } // Pij(t) = Sigma[k]{ [V]ik * [V^-1]kj * e^(Lamda_k*t) } const MDOUBLE q2pt::Pij_t(const int i, const int j, const MDOUBLE t) const { if (t<0) errorMsg::reportError("negative length in routine Pij_t"); // if ((_freq[i] == 0.0) || (_freq[j] == 0.0)) return 0.0; MDOUBLE sum=0; for (int k=0 ; k<_eigenVector.size() ; ++k) { sum+=( _leftEigen[i][k]*_rightEigen[k][j]*exp(_eigenVector[k]*t) ); } if (currectFloatingPointProblems(sum)) return sum; // LOG(1,<<"err Pij_t i="<>lll; } VVdouble get1PamFromCountMatrix(const vector& freq, const VVdouble & sub_matrix){ //---------------------------------------------------------------------------------- //input: pam1 : a pointer to the matrix where pam1 will be. // sub_matrix: the substitution matrix // freq vector: the amino acid's frequenceis. //output: non //doing: fill in 1 pam from sub matrix and freq vector //calculation: sub_matrix[a][b] is the substitution matrix, between a and b // (sub_matrix[a][b]=sub_matrix[b][a]) // we use f[a][b] insted of sub_matrix[a][b] to be the same as the book //(reference) "introduction to computational molecular biology by setubal and meidanis pg 80; // let f[a] be sigma f[a][b] on all b (we made f[a][a] = 0;) // i.e. f[a] is the number of mutation from a observed // let f be sigma f[a] on all a; (=the total mutations*2) // now, the mutaibility of a is defined as // // (1) m[a] = f[a] / (100*f*freq[a]) // // 100*f is a scaling factor for 1 pam. // then pam1[a][b] will be pr(a->b/a changed) * pr(a changed) // // (2) pam1[a][b] = (f[a][b]/f[a])*m[a] // // (3) f[a][a] = 1-m[a] (easy to show) // // notice that sigma 1pam[a][b] over all b is 1 and that // sigma freq[a]*1pam[a][a] over all a is 0.99 //---------------------------------------------------------------------------------- const int _alphabetSize=sub_matrix.size(); VVdouble pam1; pam1.resize(_alphabetSize); for (int z=0; z < _alphabetSize; ++z) { pam1[z].resize(_alphabetSize,0); } int i,j;//indices MDOUBLE total=0; // i.e.f in the above explanation for (i=0;i<_alphabetSize;++i) { for (j=0; j<_alphabetSize; ++j){ total+=sub_matrix[i][j]; } } MDOUBLE tmsum; for (i=0;i<_alphabetSize;++i) { tmsum = 0.0; for (j=i+1; j<_alphabetSize; ++j){ if ((freq[i] == 0.0) || (freq[j] == 0.0)) { pam1[i][j] = 0.0;pam1[j][i] = 0.0; } else { pam1[i][j] = sub_matrix[i][j]/(100.0*total*freq[i]); pam1[j][i] = sub_matrix[i][j]/(100.0*total*freq[j]); } } } for (i=0;i<_alphabetSize;++i) { tmsum = 0.0; for (j=0;j<_alphabetSize;++j) { if (j!=i) tmsum += pam1[i][j]; } if (freq[i] != 0.0) { pam1[i][i]=1.0-tmsum; } } #ifdef VERBOS LOG(5,<<" priting the 4*4 top-left corner of the 1pam matrix * 10^6 "<