residue probability vector is non-NULL, it gives * a 0..K-1 array of background frequencies, and the * returned match probability is an expectation (weighted * average) given those residue frequencies. * * and should only be residue codes. Any other * comparison, including comparisons involving gap or * missing data characters, or even comparisons involving * illegal digital codes, returns 0.0. * * Note that comparison of residues from "identical" * sequences (even a self-comparison) will not result in an * identity of 1.0, if the sequence(s) contain degenerate * residue codes. * * Args: abc - digtal alphabet to use * x,y - two symbols to compare * p - NULL, or background probabilities of the * canonical residues in this alphabet [0..K-1] * * Returns: the probability of an identity (match) between * residues and . */ double esl_abc_Match(const ESL_ALPHABET *abc, ESL_DSQ x, ESL_DSQ y, double *p) { int i; double prob; double sx, sy; /* Easy cases */ if (esl_abc_XIsCanonical(abc, x) && esl_abc_XIsCanonical(abc, y)) { if (x==y) return 1.0; else return 0.0; } if ( ! esl_abc_XIsResidue(abc, x) || ! esl_abc_XIsResidue(abc, x)) return 0.0; /* Else, we have at least one degenerate residue, so calc an average or expectation. */ if (p != NULL) { prob = sx = sy = 0.; for (i = 0; i < abc->K; i++) { if (abc->degen[(int)x][i]) sx += p[i]; if (abc->degen[(int)y][i]) sy += p[i]; if (abc->degen[(int)x][i] && abc->degen[(int)x][i]) prob += p[i] * p[i]; } prob = prob / (sx*sy); } else { double uniformp = 1. / (double) abc->K; prob = sx = sy = 0.; for (i = 0; i < abc->K; i++) { if (abc->degen[(int)x][i]) sx += uniformp; if (abc->degen[(int)y][i]) sy += uniformp; if (abc->degen[(int)x][i] && abc->degen[(int)x][i]) prob += uniformp * uniformp; } prob = prob / (sx*sy); } return prob; } /* Function: esl_abc_IAvgScore() * Synopsis: Returns average score for degenerate residue. * Incept: SRE, Tue Dec 21 10:53:57 2004 [Zaragoza] * * Purpose: Given a residue code in alphabet , and an array of * integer scores for the residues in the base * alphabet, calculate and return the average score * (rounded to nearest integer). * * would usually be a degeneracy code, but it * may also be a canonical residue. It must not * be a gap, missing data, or illegal symbol; if it * is, these functions return a score of 0 without * raising an error. * * and do the * same, but for float and double scores instead of integers * (and for real-valued scores, no rounding is done). * * Args: a - digital alphabet to use * x - a symbol to score * sc - score vector for canonical residues [0..K-1] * * Returns: average score for symbol */ int esl_abc_IAvgScore(const ESL_ALPHABET *a, ESL_DSQ x, const int *sc) { float result = 0.; int i; if (! esl_abc_XIsResidue(a, x)) return 0; for (i = 0; i < a->K; i++) if (a->degen[(int) x][i]) result += (float) sc[i]; result /= (float) a->ndegen[(int) x]; if (result < 0) return (int) (result - 0.5); else return (int) (result + 0.5); } float esl_abc_FAvgScore(const ESL_ALPHABET *a, ESL_DSQ x, const float *sc) { float result = 0.; int i; if (! esl_abc_XIsResidue(a, x)) return 0.; for (i = 0; i < a->K; i++) if (a->degen[(int) x][i]) result += sc[i]; result /= (float) a->ndegen[(int) x]; return result; } double esl_abc_DAvgScore(const ESL_ALPHABET *a, ESL_DSQ x, const double *sc) { double result = 0.; int i; if (! esl_abc_XIsResidue(a, x)) return 0.; for (i = 0; i < a->K; i++) if (a->degen[(int) x][i]) result += sc[i]; result /= (double) a->ndegen[(int) x]; return result; } /* Function: esl_abc_IExpectScore() * Synopsis: Returns expected score for degenerate residue. * Incept: SRE, Tue Dec 21 11:02:46 2004 [Zaragoza] * * Purpose: Given a residue code in alphabet , an * array of integer scores for the residues in the base * alphabet, and background frequencies

for the * occurrence frequencies of the residues in the base * alphabet, calculate and return the expected score * (weighted by the occurrence frequencies

). * * would usually be a degeneracy code, but it * may also be a canonical residue. It must not * be a gap, missing data, or illegal symbol; if it * is, these functions return a score of 0 without * raising an error. * * and do the * same, but for float and double scores instead of integers * (for real-valued scores, no rounding is done). * * Args: a - digital alphabet to use * x - a symbol to score * sc - score vector for canonical residues [0..K-1] * p - background prob's of canonicals [0..K-1] * * Returns: average score for symbol */ int esl_abc_IExpectScore(const ESL_ALPHABET *a, ESL_DSQ x, const int *sc, const float *p) { float result = 0.; float denom = 0.; int i; if (! esl_abc_XIsResidue(a, x)) return 0; for (i = 0; i < a->K; i++) if (a->degen[(int) x][i]) { result += (float) sc[i] * p[i]; denom += p[i]; } result /= denom; if (result < 0) return (int) (result - 0.5); else return (int) (result + 0.5); } float esl_abc_FExpectScore(const ESL_ALPHABET *a, ESL_DSQ x, const float *sc, const float *p) { float result = 0.; float denom = 0.; int i; if (! esl_abc_XIsResidue(a, x)) return 0.; for (i = 0; i < a->K; i++) if (a->degen[(int) x][i]) { result += sc[i] * p[i]; denom += p[i]; } result /= denom; return result; } double esl_abc_DExpectScore(const ESL_ALPHABET *a, ESL_DSQ x, const double *sc, const double *p) { double result = 0.; double denom = 0.; int i; if (! esl_abc_XIsResidue(a, x)) return 0.; for (i = 0; i < a->K; i++) if (a->degen[(int) x][i]) { result += sc[i] * p[i]; denom += p[i]; } result /= denom; return result; } /* Function: esl_abc_IAvgScVec() * Synopsis: Fill out score vector with average degenerate scores. * Incept: SRE, Thu Apr 6 12:12:25 2006 [AA890 enroute to Boston] * * Purpose: Given an alphabet and a score vector of length * Kp> that contains integer scores for the base * alphabet (<0..a->K-1>), fill out the rest of the score * vector, calculating average scores for * degenerate residues using . * * The score, if any, for a gap character , the * nonresidue , and the missing data character * are untouched by this function. Only the degenerate * scores are filled in. * * and do * the same, but for score vectors of floats or doubles, * respectively. * * Returns: on success. */ int esl_abc_IAvgScVec(const ESL_ALPHABET *a, int *sc) { ESL_DSQ x; for (x = a->K+1; x <= a->Kp-3; x++) sc[x] = esl_abc_IAvgScore(a, x, sc); return eslOK; } int esl_abc_FAvgScVec(const ESL_ALPHABET *a, float *sc) { ESL_DSQ x; for (x = a->K+1; x <= a->Kp-3; x++) sc[x] = esl_abc_FAvgScore(a, x, sc); return eslOK; } int esl_abc_DAvgScVec(const ESL_ALPHABET *a, double *sc) { ESL_DSQ x; for (x = a->K+1; x <= a->Kp-3; x++) sc[x] = esl_abc_DAvgScore(a, x, sc); return eslOK; } /* Function: esl_abc_IExpectScVec() * Synopsis: Fill out score vector with average expected scores. * Incept: SRE, Thu Apr 6 12:23:52 2006 [AA 890 enroute to Boston] * * Purpose: Given an alphabet , a score vector of length * Kp> that contains integer scores for the base * alphabet (<0..a->K-1>), and residue occurrence probabilities * K-1]>; fill in the scores for the * degenerate residues using . * * The score, if any, for a gap character , the * nonresidue , and the missing data character * are untouched by this function. Only the degenerate * scores are filled in. * * and do * the same, but for score vectors of floats or doubles, * respectively. The probabilities