/* I/O of multiple sequence alignments in A2M format (UCSC SAM) * * Contents: * 1. API for reading/writing A2M format * 2. Internal functions used by the A2M parser * 3. Unit tests. * 4. Test driver. * 5. Examples. * * Reference: * http://compbio.soe.ucsc.edu/a2m-desc.html */ #include "esl_config.h" #include #include #include "easel.h" #include "esl_alphabet.h" #include "esl_mem.h" #include "esl_msa.h" #include "esl_msafile.h" #include "esl_msafile_a2m.h" static int a2m_padding_digital(ESL_MSA *msa, char **csflag, int *nins, int ncons); static int a2m_padding_text (ESL_MSA *msa, char **csflag, int *nins, int ncons); /***************************************************************** *# 1. API for reading/writing A2M format *****************************************************************/ /* Function: esl_msafile_a2m_SetInmap() * Synopsis: Set input map specific for A2M format * * Purpose: Set the inmap> for A2M format. * * A2M ignores whitespace and periods (and ignoring * periods makes us agnostic whether the input is * "dotless" format or not). Make ' ', '\t', and * '.' ignored. * * A2M format only allows - for a gap, so make * all other Easel gap characters illegal on input. * * A2M format handles an 'O' specially: this indicates * a FIM (free insertion module) to the SAM software. * We ignore it. * A2M allows ACDEFGHIKLMNPQRSTVWY for aa, plus XBZ. * Unknown letters (including other ambig codes) are mapped * to X. A2M allows ACGTU for nucleic, plus YRN. Unknown * letters (including other ambig codes) are mapped to N. * However, Easel enforces its normal input restrictions on * residues: digital bioalphabets allow only valid residue * symbols, and text mode allows any isalpha() character * verbatim. */ int esl_msafile_a2m_SetInmap(ESL_MSAFILE *afp) { int sym; if (afp->abc) { for (sym = 0; sym < 128; sym++) afp->inmap[sym] = afp->abc->inmap[sym]; afp->inmap[0] = esl_abc_XGetUnknown(afp->abc); afp->inmap['_'] = eslDSQ_ILLEGAL; afp->inmap['*'] = eslDSQ_ILLEGAL; afp->inmap['~'] = eslDSQ_ILLEGAL; } if (! afp->abc) { for (sym = 1; sym < 128; sym++) afp->inmap[sym] = (isalpha(sym) ? sym : eslDSQ_ILLEGAL); afp->inmap[0] = '?'; afp->inmap['-'] = '-'; } afp->inmap[' '] = eslDSQ_IGNORED; afp->inmap['\t'] = eslDSQ_IGNORED; afp->inmap['.'] = eslDSQ_IGNORED; afp->inmap['O'] = eslDSQ_IGNORED; afp->inmap['o'] = eslDSQ_IGNORED; return eslOK; } /* Function: esl_msafile_a2m_GuessAlphabet() * Synopsis: Guess the alphabet of an open A2M MSA file. * * Purpose: Guess the alpbabet of the sequences in open * A2M format MSA file . * * On a normal return, <*ret_type> is set to , * , or , and is reset to its * original position. * * Args: afp - open A2M format MSA file * ret_type - RETURN: , , or * * Returns: on success. * if alphabet type can't be determined. * In either case, is rewound to the position it * started at. * * Throws: on allocation error. * on failures of fread() or other system calls */ int esl_msafile_a2m_GuessAlphabet(ESL_MSAFILE *afp, int *ret_type) { int alphatype = eslUNKNOWN; esl_pos_t anchor = -1; int threshold[3] = { 500, 5000, 50000 }; /* we check after 500, 5000, 50000 residues; else we go to EOF */ int nsteps = 3; int step = 0; int nres = 0; int x; int64_t ct[26]; char *p; esl_pos_t n, pos; int status; for (x = 0; x < 26; x++) ct[x] = 0; anchor = esl_buffer_GetOffset(afp->bf); if ((status = esl_buffer_SetAnchor(afp->bf, anchor)) != eslOK) { status = eslEINCONCEIVABLE; goto ERROR; } /* [eslINVAL] can't happen here */ while ( (status = esl_buffer_GetLine(afp->bf, &p, &n)) == eslOK) { while (n && isspace(*p)) { p++; n--; } if (!n || *p == '>') continue; for (pos = 0; pos < n; pos++) if (isalpha(p[pos])) { x = toupper(p[pos]) - 'A'; ct[x]++; nres++; } /* try to stop early, checking after 500, 5000, and 50000 residues: */ if (step < nsteps && nres > threshold[step]) { if ((status = esl_abc_GuessAlphabet(ct, &alphatype)) == eslOK) goto DONE; /* (eslENOALPHABET) */ step++; } } if (status != eslEOF) goto ERROR; /* [eslEMEM,eslESYS,eslEINCONCEIVABLE] */ status = esl_abc_GuessAlphabet(ct, &alphatype); /* (eslENOALPHABET) */ /* deliberate flowthrough...*/ DONE: esl_buffer_SetOffset(afp->bf, anchor); /* Rewind to where we were. */ esl_buffer_RaiseAnchor(afp->bf, anchor); *ret_type = alphatype; return status; ERROR: if (anchor != -1) { esl_buffer_SetOffset(afp->bf, anchor); esl_buffer_RaiseAnchor(afp->bf, anchor); } *ret_type = eslUNKNOWN; return status; } /* Function: esl_msafile_a2m_Read() * Synopsis: Read a UCSC A2M format alignment. * * Purpose: Read an MSA from an open , parsing * for UCSC A2M (SAM) format. Create a new MSA, * and return a ptr to it in <*ret_msa>. Caller is responsible * for freeing this . * * The has a reference line (rf[]>) that * corresponds to the uppercase/lowercase columns in the * alignment: consensus (uppercase) columns are marked 'X', * and insert (lowercase) columns are marked '.' in the RF * annotation line. * * This input parser can deal both with "dotless" A2M, and * full A2M format with dots. * * Args: afp - open * ret_msa - RETURN: newly parsed * * Returns: on success. <*ret_msa> is set to the newly * allocated MSA, and is at EOF. * * if no (more) alignment data are found in * , and is returned at EOF. * * on a parse error. <*ret_msa> is set to * . contains information sufficient for * constructing useful diagnostic output: * | errmsg> | user-directed error message | * | linenumber> | line # where error was detected | * | line> | offending line (not NUL-term) | * | n> | length of offending line | * | bf->filename> | name of the file | * and is poised at the start of the following line, * so (in principle) the caller could try to resume * parsing. * * Throws: - an allocation failed. * - a system call such as fread() failed * - "impossible" corruption * On these, <*ret_msa> is returned , and the state of * is undefined. */ int esl_msafile_a2m_Read(ESL_MSAFILE *afp, ESL_MSA **ret_msa) { ESL_MSA *msa = NULL; char **csflag = NULL; /* csflag[i][pos] is TRUE if aseq[i][pos] was uppercase consensus */ int *nins = NULL; /* # of inserted residues before each consensus col [0..ncons-1] */ int *this_nins = NULL; /* # of inserted residues before each consensus residue in this seq */ int nseq = 0; int ncons = 0; int idx; int64_t thislen; int64_t spos; int this_ncons; int cpos, bpos; char *p, *tok; esl_pos_t n, toklen; int status; ESL_DASSERT1( (afp->format == eslMSAFILE_A2M) ); afp->errmsg[0] = '\0'; if (afp->abc && (msa = esl_msa_CreateDigital(afp->abc, 16, -1)) == NULL) { status = eslEMEM; goto ERROR; } if (! afp->abc && (msa = esl_msa_Create( 16, -1)) == NULL) { status = eslEMEM; goto ERROR; } ESL_ALLOC(csflag, sizeof(char *) * msa->sqalloc); for (idx = 0; idx < msa->sqalloc; idx++) csflag[idx] = NULL; /* skip leading blank lines in file */ while ( (status = esl_msafile_GetLine(afp, &p, &n)) == eslOK && esl_memspn(afp->line, afp->n, " \t") == afp->n) ; if (status != eslOK) goto ERROR; /* includes normal EOF */ /* tolerate sloppy space at start of name/desc line */ while (n && isspace(*p)) { p++; n--; } if (*p != '>') ESL_XFAIL(eslEFORMAT, afp->errmsg, "expected A2M name/desc line starting with >"); do { /* for each record starting in '>': */ p++; n--; /* advance past > */ if ( (status = esl_memtok(&p, &n, " \t", &tok, &toklen)) != eslOK) ESL_XFAIL(eslEFORMAT, afp->errmsg, "no name found for A2M record"); if (nseq >= msa->sqalloc) { int old_sqalloc = msa->sqalloc; if ( (status = esl_msa_Expand(msa)) != eslOK) goto ERROR; ESL_REALLOC(csflag, sizeof(char *) * msa->sqalloc); for (idx = old_sqalloc; idx < msa->sqalloc; idx++) csflag[idx] = NULL; } if ( (status = esl_msa_SetSeqName (msa, nseq, tok, toklen)) != eslOK) goto ERROR; if (n && (status = esl_msa_SetSeqDescription(msa, nseq, p, n)) != eslOK) goto ERROR; /* now for each sequence line... */ thislen = 0; /* count of lowercase, uppercase, and '-': w/o dots, on first pass */ this_ncons = 0; /* count of uppercase + '-': number of consensus columns in alignment: must match for all seqs */ if (! this_nins) // Starting w/ first sequence, we need a wee initial alloc. Also nseq == 0, ncons == 0 ... ESL_ALLOC(this_nins, sizeof(int) * 1); for (cpos = 0; cpos <= ncons; cpos++) // A little tricksy. On the first sequence, ncons == 0, so this initializes just [0]. this_nins[cpos] = 0; while ( (status = esl_msafile_GetLine(afp, &p, &n)) == eslOK) { while (n && isspace(*p)) { p++; n--; } /* tolerate and skip leading whitespace on line */ if (n == 0) continue; /* tolerate and skip blank lines */ if (*p == '>') break; ESL_REALLOC(csflag[nseq], sizeof(char) * (thislen + n + 1)); /* might be an overalloc by a bit, depending on whitespace on line */ if (nseq == 0) { ESL_REALLOC(this_nins, sizeof(int) * (this_ncons + n + 1)); for (cpos = this_ncons+1; cpos <= this_ncons+n; cpos++) // DO NOT zero [this_ncons]; we may still be in an insert from prev line. this_nins[cpos] = 0; } for (spos = thislen, bpos = 0; bpos < n; bpos++) { if (p[bpos] == 'O') continue; else if (isupper(p[bpos])) { csflag[nseq][spos++] = TRUE; this_ncons++; } else if (islower(p[bpos])) { csflag[nseq][spos++] = FALSE; this_nins[this_ncons]++; } else if (p[bpos] == '-') { csflag[nseq][spos++] = TRUE; this_ncons++; } if (ncons && this_ncons > ncons) ESL_XFAIL(eslEFORMAT, afp->errmsg, "unexpected # of consensus residues, didn't match previous seq(s)"); } csflag[nseq][spos] = TRUE; /* need a sentinel, because of the way the padding functions work */ if (msa->abc) { status = esl_abc_dsqcat(afp->inmap, &(msa->ax[nseq]), &thislen, p, n); } if (! msa->abc) { status = esl_strmapcat (afp->inmap, &(msa->aseq[nseq]), &thislen, p, n); } if (status == eslEINVAL) ESL_XFAIL(eslEFORMAT, afp->errmsg, "one or more invalid sequence characters"); else if (status != eslOK) goto ERROR; ESL_DASSERT1( (spos == thislen) ); } if (status != eslOK && status != eslEOF) goto ERROR; /* exception thrown by esl_msafile_GetLine() */ /* status == OK: then *p == '>'. status == eslEOF: we're eof. status == anything else: error */ /* Finished reading a sequence record. */ /* Edge case: it's possible to have a sequence of length 0 residues in the alignment, * and that causes the loop structure above to exit before allocating csflag[] or ax[] */ if (thislen == 0) { ESL_ALLOC(csflag[nseq], sizeof(char) * 1); csflag[nseq][0] = TRUE; // csflag[] needs a sentinel even if there's no seq; make one. ESL_ALLOC(msa->ax[nseq], sizeof(ESL_DSQ) * 2); msa->ax[nseq][0] = msa->ax[nseq][1] = eslDSQ_SENTINEL; } if (nseq == 0) { ncons = this_ncons; ESL_ALLOC(nins, sizeof(int) * (ncons+1)); for (cpos = 0; cpos <= ncons; cpos++) nins[cpos] = this_nins[cpos]; } else { if (this_ncons != ncons) ESL_XFAIL(eslEFORMAT, afp->errmsg, "unexpected # of consensus residues, didn't match previous seq(s). (Do you have an O residue?)"); for (cpos = 0; cpos <= ncons; cpos++) nins[cpos] = ESL_MAX(nins[cpos], this_nins[cpos]); } nseq++; } while (status == eslOK); /* Now we have nseq *unaligned* sequences in ax/aseq[0..nseq-1]; call the length slen, though we don't explicitly store it * csflag[idx][spos] tells us whether each unaligned residue is an insertion or consensus, for spos==0..slen-1. * nins[0..ncons] tells us the max number of inserted residues before each consensus column * This is sufficient information to reconstruct each aligned sequence. */ msa->nseq = nseq; if (msa->abc) { if ((status = a2m_padding_digital(msa, csflag, nins, ncons)) != eslOK) goto ERROR; } if (!msa->abc) { if ((status = a2m_padding_text (msa, csflag, nins, ncons)) != eslOK) goto ERROR; } if (( status = esl_msa_SetDefaultWeights(msa)) != eslOK) goto ERROR; *ret_msa = msa; free(nins); free(this_nins); for (idx = 0; idx < msa->nseq; idx++) free(csflag[idx]); free(csflag); return eslOK; ERROR: if (nins) free(nins); if (this_nins) free(this_nins); if (csflag) { for (idx = 0; idx < msa->nseq; idx++) if (csflag[idx]) free(csflag[idx]); free(csflag); } if (msa) esl_msa_Destroy(msa); return status; } /* Function: esl_msafile_a2m_Write() * Synopsis: Write an A2M (UCSC SAM) dotless format alignment to a stream. * * Purpose: Write alignment in dotless UCSC A2M format to a * stream . * * The should have a valid reference line rf>, * with alphanumeric characters marking consensus (match) * columns, and non-alphanumeric characters marking * nonconsensus (insert) columns. If it does not, * then as a fallback, the first sequence in the alignment is * considered to be the consensus. * * In "dotless" A2M format, gap characters (.) in insert * columns are omitted; therefore sequences can be of * different lengths, but each sequence has the same number * of consensus columns (residue or -). * * A2M format cannot represent missing data symbols * (Easel's ~). Any missing data symbols are converted to * gaps. * * A2M format cannot represent pyrrolysine residues in * amino acid sequences, because it treats 'O' symbols * specially, as indicating a position at which a * free-insertion module (FIM) should be created. Any 'O' * in the is written instead as an unknown * residue ('X', in protein sequences). * * Args: fp - open output stream * msa - MSA to write * * Returns: on success. * * Throws: on allocation failure. * on any system write error, such as filled disk. */ int esl_msafile_a2m_Write(FILE *fp, const ESL_MSA *msa) { char *buf = NULL; int cpl = 60; int bpos; int pos; int is_consensus; int is_residue; int do_dotless = TRUE; /* just changing this to FALSE makes it write dots too */ int i; int sym; int status; ESL_ALLOC(buf, sizeof(char) * (cpl+1)); for (i = 0; i < msa->nseq; i++) { /* Construct the name/description line */ if (fprintf(fp, ">%s", msa->sqname[i]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); if (msa->sqacc != NULL && msa->sqacc[i] != NULL) { if (fprintf(fp, " %s", msa->sqacc[i]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); } if (msa->sqdesc != NULL && msa->sqdesc[i] != NULL) { if (fprintf(fp, " %s", msa->sqdesc[i]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); } if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); if (msa->abc) { pos = 0; while (pos < msa->alen) { for (bpos = 0; pos < msa->alen && bpos < cpl; pos++) { sym = msa->abc->sym[msa->ax[i][pos+1]]; /* note off-by-one in digitized aseq: 1..alen */ is_residue = esl_abc_XIsResidue(msa->abc, msa->ax[i][pos+1]); if (msa->rf) is_consensus = (isalnum(msa->rf[pos]) ? TRUE : FALSE); else is_consensus = (esl_abc_XIsResidue(msa->abc, msa->ax[0][pos+1]) ? TRUE : FALSE); if (sym == 'O') sym = esl_abc_CGetUnknown(msa->abc); /* watch out: O means "insert a FIM" in a2m format, not pyrrolysine */ if (is_consensus) { buf[bpos++] = (is_residue ? toupper(sym) : '-'); } else if (is_residue) { buf[bpos++] = tolower(sym); } else if (! do_dotless) { buf[bpos++] = '.'; } } buf[bpos] = '\0'; if (bpos) { if (fprintf(fp, "%s\n", buf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed");} } } if (! msa->abc) { pos = 0; while (pos < msa->alen) { for (bpos = 0; pos < msa->alen && bpos < cpl; pos++) { sym = msa->aseq[i][pos]; is_residue = isalpha(msa->aseq[i][pos]); if (msa->rf) is_consensus = (isalnum(msa->rf[pos]) ? TRUE : FALSE); else is_consensus = (isalnum(msa->aseq[0][pos]) ? TRUE : FALSE); if (sym == 'O') sym = 'X'; if (is_consensus) { buf[bpos++] = ( is_residue ? toupper(sym) : '-'); } else if (is_residue) { buf[bpos++] = tolower(sym); } else if (! do_dotless) { buf[bpos++] = '.'; } } buf[bpos] = '\0'; if (bpos) { if (fprintf(fp, "%s\n", buf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); } } } } /* end, loop over sequences in the MSA */ free(buf); return eslOK; ERROR: if (buf) free(buf); return status; } /*------------- end, API for i/o of a2m format ------------------*/ /***************************************************************** * 2. Internal functions used by the A2M parser *****************************************************************/ /* A2M parser has an input phase, followed by an alignment padding phase. * The a2m_padding_{digital,text} functions do the padding phase. * * Upon call: * msa->nseq is set; * msa->ax[0..nseq-1][1..slen] are unaligned seqs (consensus cols + * inserted residues); or msa->aseq[0..nseq-1][0..slen-1], for text mode * csflag[0..nseq-1][0..slen-1] is TRUE/FALSE for whether each pos * in msa->ax[][1..slen]/msa->aseq[][0..slen-1] is consensus or insert * nins[0..ncons] is the number of insert columns preceding each consensus column * * watch out, ax[] is a digital sequence, 1..alen not 0..alen-1: hence * the [apos+1], [spos+1] indexing * * these functions may not fail with any normal (eslEFORMAT) error, because * we wouldn't be able to tell the line/linenumber of the error * * Upon successful return: * msa->alen is set * all msa->ax[]/msa->aseq are now aligned digital sequences * msa->rf is set */ static int a2m_padding_digital(ESL_MSA *msa, char **csflag, int *nins, int ncons) { ESL_DSQ *ax = NULL; /* new aligned sequence - will be swapped into msa->ax[] */ ESL_DSQ gapsym = esl_abc_XGetGap(msa->abc); int apos, cpos, spos; /* position counters for alignment 0..alen, consensus cols 0..cpos-1, sequence position 0..slen-1 */ int alen; int icount; int idx; int status; alen = ncons; for (cpos = 0; cpos <= ncons; cpos++) alen += nins[cpos]; ESL_ALLOC(msa->rf, sizeof(char) * (alen+1)); for (apos = 0, cpos = 0; cpos <= ncons; cpos++) { for (icount = 0; icount < nins[cpos]; icount++) msa->rf[apos++] = '.'; if (cpos < ncons) msa->rf[apos++] = 'x'; } msa->rf[apos] = '\0'; for (idx = 0; idx < msa->nseq; idx++) { ESL_ALLOC(ax, sizeof(ESL_DSQ) * (alen + 2)); ax[0] = eslDSQ_SENTINEL; apos = spos = 0; for (cpos = 0; cpos <= ncons; cpos++) { icount = 0; while (csflag[idx][spos] == FALSE) { ax[apos+1] = msa->ax[idx][spos+1]; apos++; spos++; icount++; } while (icount < nins[cpos]) { ax[apos+1] = gapsym; apos++; icount++; } if (cpos < ncons) { ax[apos+1] = msa->ax[idx][spos+1]; apos++; spos++; } } ESL_DASSERT1( (msa->ax[idx][spos+1] == eslDSQ_SENTINEL) ); ESL_DASSERT1( (apos == alen) ); ax[alen+1] = eslDSQ_SENTINEL; free(msa->ax[idx]); msa->ax[idx] = ax; ax = NULL; } msa->alen = alen; return eslOK; ERROR: if (ax) free(ax); return status; } static int a2m_padding_text(ESL_MSA *msa, char **csflag, int *nins, int ncons) { char *aseq = NULL; /* new aligned sequence - will be swapped into msa->aseq[] */ int apos, cpos, spos; /* position counters for alignment 0..alen, consensus cols 0..cpos-1, sequence position 0..slen-1 */ int alen; int icount; int idx; int status; alen = ncons; for (cpos = 0; cpos <= ncons; cpos++) alen += nins[cpos]; ESL_ALLOC(msa->rf, sizeof(char) * (alen+1)); for (apos = 0, cpos = 0; cpos <= ncons; cpos++) { for (icount = 0; icount < nins[cpos]; icount++) msa->rf[apos++] = '.'; if (cpos < ncons) msa->rf[apos++] = 'x'; } msa->rf[apos] = '\0'; for (idx = 0; idx < msa->nseq; idx++) { ESL_ALLOC(aseq, sizeof(char) * (alen + 1)); apos = spos = 0; for (cpos = 0; cpos <= ncons; cpos++) { icount = 0; while (csflag[idx][spos] == FALSE) { aseq[apos] = msa->aseq[idx][spos]; apos++; spos++; icount++; } while (icount < nins[cpos]) { aseq[apos] = '.'; apos++; icount++; } if (cpos < ncons) { aseq[apos] = msa->aseq[idx][spos]; apos++; spos++; } } ESL_DASSERT1( (msa->aseq[idx][spos] == '\0') ); ESL_DASSERT1( (apos == alen) ); aseq[alen] = '\0'; free(msa->aseq[idx]); msa->aseq[idx] = aseq; aseq = NULL; } msa->alen = alen; return eslOK; ERROR: if (aseq) free(aseq); return status; } /*---------- end, internal functions for the parser -------------*/ /***************************************************************** * 3. Unit tests. *****************************************************************/ #ifdef eslMSAFILE_A2M_TESTDRIVE #include "esl_random.h" /* A2M is unable to write O (pyrrolysine) residues, because it uses O * to mean a free insertion module. For unit tests that generate * dirty/sampled MSAs, we have to avoid O's in those alignments. */ static void a2m_no_O(ESL_MSA *msa) { int idx; int apos; int symO; ESL_DASSERT1(( msa->flags & eslMSA_DIGITAL )); ESL_DASSERT1(( msa->abc != NULL )); symO = esl_abc_DigitizeSymbol(msa->abc, 'O'); for (idx = 0; idx < msa->nseq; idx++) for (apos = 1; apos <= msa->alen; apos++) if (msa->ax[idx][apos] == symO) msa->ax[idx][apos] = esl_abc_XGetUnknown(msa->abc); } static void utest_write_good1(FILE *ofp, int *ret_alphatype, int *ret_nseq, int *ret_alen) { fputs(">MYG_PHYCA\n", ofp); fputs("VLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFKHLKTEAEMKASED\n", ofp); fputs("LKKHGVTVLTALGAILKKKGHHEAELKPLAQSHATKHKIPIKYLEFISEAIIHVLHSRHP\n", ofp); fputs("GDFGADAQGAMNKALELFRKDIAAKYKELGYQG\n", ofp); fputs(">GLB5_PETMA\n", ofp); fputs("pivdtgsvApLSAAEKTKIRSAWAPVYSTYETSGVDILVKFFTSTPAAQEFFPKFKGLTT\n", ofp); fputs("ADQLKKSADVRWHAERIINAVNDAVASMDDtekMSMKLRDLSGKHAKSFQVDPQYFKVLA\n", ofp); fputs("AVI---------ADTVAAGDAGFEKLMSMICILLRSAY-------\n", ofp); fputs(">HBB_HUMAN\n", ofp); fputs("VhLTPEEKSAVTALWGKV--NVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNP\n", ofp); fputs("KVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHF\n", ofp); fputs("GKEFTPPVQAAYQKVVAGVANALAHKYH------\n", ofp); fputs(">HBA_HUMAN\n", ofp); fputs("VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHF------DLSHGSAQ\n", ofp); fputs("VKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLP\n", ofp); fputs("AEFTPAVHASLDKFLASVSTVLTSKYR------\n", ofp); *ret_alphatype = eslAMINO; *ret_nseq = 4; *ret_alen = 165; } static void utest_write_good2(FILE *ofp, int *ret_alphatype, int *ret_nseq, int *ret_alen) { fputs(">tRNA2\n", ofp); fputs("UCCGAUAUAGUGUAACGGCUAUCACAUCACGCUUUCACCGUGGAGACCGGGGUUCGACUC\n", ofp); fputs("CCCGUAUCGGAG\n", ofp); fputs(">tRNA3\n", ofp); fputs("UCCGUGAUAGUUUAAUGGUCAGAAUGG-GCGCUUGUCGCGUGCcAGAUCGGGGUUCAAUU\n", ofp); fputs("CCCCGUCGCGGAG\n", ofp); fputs(">tRNA5\n", ofp); fputs("GGGCACAUGGCGCAGUUGGUAGCGCGCUUCCCUUGCAAGGAAGaGGUCAUCGGUUCGAUU\n", ofp); fputs("CCGGUUGCGUCCA\n", ofp); fputs(">tRNA1\n", ofp); fputs("GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGaGGUCCUGUGUUCGAUC\n", ofp); fputs("CACAGAAUUCGCA\n", ofp); fputs(">tRNA4\n", ofp); fputs("GCUCGUAUGGCGCAGUGG-UAGCGCAGCAGAUUGCAAAUCUGUuGGUCCUUAGUUCGAUC\n", ofp); fputs("CUGAGUGCGAGCU\n", ofp); *ret_alphatype = eslRNA; *ret_nseq = 5; *ret_alen = 73; } static void utest_goodfile(char *filename, int testnumber, int expected_alphatype, int expected_nseq, int expected_alen) { ESL_ALPHABET *abc = NULL; ESL_MSAFILE *afp = NULL; ESL_MSA *msa1 = NULL; ESL_MSA *msa2 = NULL; char tmpfile1[32] = "esltmpXXXXXX"; char tmpfile2[32] = "esltmpXXXXXX"; FILE *ofp = NULL; int status; /* A2M must be specified (no format guessing, unless we use .a2m suffix) but guessing the alphabet should work: this is a digital open */ if ( (status = esl_msafile_Open(&abc, filename, NULL, eslMSAFILE_A2M, NULL, &afp)) != eslOK) esl_fatal("a2m good file test %d failed: digital open", testnumber); if (afp->format != eslMSAFILE_A2M) esl_fatal("a2m good file test %d failed: bad format", testnumber); if (abc->type != expected_alphatype) esl_fatal("a2m good file test %d failed: alphabet autodetection", testnumber); /* This is a digital read, using . */ if ( (status = esl_msafile_a2m_Read(afp, &msa1)) != eslOK) esl_fatal("a2m good file test %d failed: msa read, digital", testnumber); if (msa1->nseq != expected_nseq || msa1->alen != expected_alen) esl_fatal("a2m good file test %d failed: nseq/alen", testnumber); if (esl_msa_Validate(msa1, NULL) != eslOK) esl_fatal("a2m good file test %d failed: msa invalid", testnumber); esl_msafile_Close(afp); /* write it back out to a new tmpfile (digital write) */ if ( (status = esl_tmpfile_named(tmpfile1, &ofp)) != eslOK) esl_fatal("a2m good file test %d failed: tmpfile creation", testnumber); if ( (status = esl_msafile_a2m_Write(ofp, msa1)) != eslOK) esl_fatal("a2m good file test %d failed: msa write, digital", testnumber); fclose(ofp); /* now open and read it as text mode, in known format. */ if ( (status = esl_msafile_Open(NULL, tmpfile1, NULL, eslMSAFILE_A2M, NULL, &afp)) != eslOK) esl_fatal("a2m good file test %d failed: text mode open", testnumber); if ( (status = esl_msafile_a2m_Read(afp, &msa2)) != eslOK) esl_fatal("a2m good file test %d failed: msa read, text", testnumber); if (msa2->nseq != expected_nseq || msa2->alen != expected_alen) esl_fatal("a2m good file test %d failed: nseq/alen", testnumber); if (esl_msa_Validate(msa2, NULL) != eslOK) esl_fatal("a2m good file test %d failed: msa invalid", testnumber); esl_msafile_Close(afp); /* write it back out to a new tmpfile (text write) */ if ( (status = esl_tmpfile_named(tmpfile2, &ofp)) != eslOK) esl_fatal("a2m good file test %d failed: tmpfile creation", testnumber); if ( (status = esl_msafile_a2m_Write(ofp, msa2)) != eslOK) esl_fatal("a2m good file test %d failed: msa write, text", testnumber); fclose(ofp); esl_msa_Destroy(msa2); /* open and read it in digital mode */ if ( (status = esl_msafile_Open(&abc, tmpfile1, NULL, eslMSAFILE_A2M, NULL, &afp)) != eslOK) esl_fatal("a2m good file test %d failed: 2nd digital mode open", testnumber); if ( (status = esl_msafile_a2m_Read(afp, &msa2)) != eslOK) esl_fatal("a2m good file test %d failed: 2nd digital msa read", testnumber); if (esl_msa_Validate(msa2, NULL) != eslOK) esl_fatal("a2m good file test %d failed: msa invalid", testnumber); esl_msafile_Close(afp); /* this msa should be identical to */ if (esl_msa_Compare(msa1, msa2) != eslOK) esl_fatal("a2m good file test %d failed: msa compare", testnumber); remove(tmpfile1); remove(tmpfile2); esl_msa_Destroy(msa1); esl_msa_Destroy(msa2); esl_alphabet_Destroy(abc); } static void write_test_msas(FILE *ofp1, FILE *ofp2) { fprintf(ofp1, ">seq1 description line for seq1\n"); fprintf(ofp1, "ACDEFGHIKLMNPQRSTVWY\n"); fprintf(ofp1, "ACDEFGHIKLMNPQRSTVWY\n"); fprintf(ofp1, ">seq2 description line for seq2\n"); fprintf(ofp1, "ACDEFGHIKLMNPQRSTV--\n"); fprintf(ofp1, "ACDEFGHIKLMNPQRSTVWY\n"); fprintf(ofp1, "yy\n"); fprintf(ofp1, ">seq3\n"); fprintf(ofp1, "aaACDEFGHIKLMNPQRSTV\n"); fprintf(ofp1, "--ACDEFGHIKLMNPQRSTVWY\n"); fprintf(ofp1, ">seq4 \n"); fprintf(ofp1, "ACDEFGHIKLMNPQR\n"); fprintf(ofp1, "STVWYACDEFGHIKL\n"); fprintf(ofp1, "MNPQRSTVWY\n"); fprintf(ofp2, "# STOCKHOLM 1.0\n"); fprintf(ofp2, "\n"); fprintf(ofp2, "#=GS seq1 DE description line for seq1\n"); fprintf(ofp2, "#=GS seq2 DE description line for seq2\n"); fprintf(ofp2, "\n"); fprintf(ofp2, "#=GC RF ..xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx..\n"); fprintf(ofp2, "seq1 ..ACDEFGHIKLMNPQRSTVWYACDEFGHIKLMNPQRSTVWY..\n"); fprintf(ofp2, "seq2 ..ACDEFGHIKLMNPQRSTV--ACDEFGHIKLMNPQRSTVWYyy\n"); fprintf(ofp2, "seq3 aaACDEFGHIKLMNPQRSTV--ACDEFGHIKLMNPQRSTVWY..\n"); fprintf(ofp2, "seq4 ..ACDEFGHIKLMNPQRSTVWYACDEFGHIKLMNPQRSTVWY..\n"); fprintf(ofp2, "//\n"); } static void read_test_msas_digital(char *a2mfile, char *stkfile) { char msg[] = "A2M msa digital read unit test failed"; ESL_ALPHABET *abc = NULL; ESL_MSAFILE *afp1 = NULL; ESL_MSAFILE *afp2 = NULL; ESL_MSA *msa1, *msa2, *msa3, *msa4; FILE *a2mfp, *stkfp; char a2mfile2[32] = "esltmpa2m2XXXXXX"; char stkfile2[32] = "esltmpstk2XXXXXX"; if ( esl_msafile_Open(&abc, a2mfile, NULL, eslMSAFILE_A2M, NULL, &afp1) != eslOK) esl_fatal(msg); if ( !abc || abc->type != eslAMINO) esl_fatal(msg); if ( esl_msafile_Open(&abc, stkfile, NULL, eslMSAFILE_STOCKHOLM, NULL, &afp2) != eslOK) esl_fatal(msg); if ( esl_msafile_a2m_Read (afp1, &msa1) != eslOK) esl_fatal(msg); if ( esl_msafile_stockholm_Read(afp2, &msa2) != eslOK) esl_fatal(msg); if ( esl_msa_Compare(msa1, msa2) != eslOK) esl_fatal(msg); if ( esl_msafile_a2m_Read (afp1, &msa3) != eslEOF) esl_fatal(msg); if ( esl_msafile_stockholm_Read(afp2, &msa3) != eslEOF) esl_fatal(msg); esl_msafile_Close(afp2); esl_msafile_Close(afp1); /* Now write stk to a2m file, and vice versa; then retest */ if ( esl_tmpfile_named(a2mfile2, &a2mfp) != eslOK) esl_fatal(msg); if ( esl_tmpfile_named(stkfile2, &stkfp) != eslOK) esl_fatal(msg); if ( esl_msafile_a2m_Write (a2mfp, msa2) != eslOK) esl_fatal(msg); if ( esl_msafile_stockholm_Write(stkfp, msa1, eslMSAFILE_PFAM) != eslOK) esl_fatal(msg); fclose(a2mfp); fclose(stkfp); if ( esl_msafile_Open(&abc, a2mfile2, NULL, eslMSAFILE_A2M, NULL, &afp1) != eslOK) esl_fatal(msg); if ( esl_msafile_Open(&abc, stkfile2, NULL, eslMSAFILE_STOCKHOLM, NULL, &afp2) != eslOK) esl_fatal(msg); if ( esl_msafile_a2m_Read (afp1, &msa3) != eslOK) esl_fatal(msg); if ( esl_msafile_stockholm_Read(afp2, &msa4) != eslOK) esl_fatal(msg); if ( esl_msa_Compare(msa3, msa4) != eslOK) esl_fatal(msg); remove(a2mfile2); remove(stkfile2); esl_msafile_Close(afp2); esl_msafile_Close(afp1); esl_msa_Destroy(msa1); esl_msa_Destroy(msa2); esl_msa_Destroy(msa3); esl_msa_Destroy(msa4); esl_alphabet_Destroy(abc); } static void read_test_msas_text(char *a2mfile, char *stkfile) { char msg[] = "A2M msa text-mode read unit test failed"; ESL_MSAFILE *afp1 = NULL; ESL_MSAFILE *afp2 = NULL; ESL_MSA *msa1, *msa2, *msa3, *msa4; FILE *a2mfp, *stkfp; char a2mfile2[32] = "esltmpa2m2XXXXXX"; char stkfile2[32] = "esltmpstk2XXXXXX"; /* vvvv-- everything's the same as the digital utest except these NULLs */ if ( esl_msafile_Open(NULL, a2mfile, NULL, eslMSAFILE_A2M, NULL, &afp1) != eslOK) esl_fatal(msg); if ( esl_msafile_Open(NULL, stkfile, NULL, eslMSAFILE_STOCKHOLM, NULL, &afp2) != eslOK) esl_fatal(msg); if ( esl_msafile_a2m_Read (afp1, &msa1) != eslOK) esl_fatal(msg); if ( esl_msafile_stockholm_Read(afp2, &msa2) != eslOK) esl_fatal(msg); if ( esl_msa_Compare(msa1, msa2) != eslOK) esl_fatal(msg); if ( esl_msafile_a2m_Read (afp1, &msa3) != eslEOF) esl_fatal(msg); if ( esl_msafile_stockholm_Read(afp2, &msa3) != eslEOF) esl_fatal(msg); esl_msafile_Close(afp2); esl_msafile_Close(afp1); if ( esl_tmpfile_named(a2mfile2, &a2mfp) != eslOK) esl_fatal(msg); if ( esl_tmpfile_named(stkfile2, &stkfp) != eslOK) esl_fatal(msg); if ( esl_msafile_a2m_Write (a2mfp, msa2) != eslOK) esl_fatal(msg); if ( esl_msafile_stockholm_Write(stkfp, msa1, eslMSAFILE_PFAM) != eslOK) esl_fatal(msg); fclose(a2mfp); fclose(stkfp); if ( esl_msafile_Open(NULL, a2mfile2, NULL, eslMSAFILE_A2M, NULL, &afp1) != eslOK) esl_fatal(msg); if ( esl_msafile_Open(NULL, stkfile2, NULL, eslMSAFILE_STOCKHOLM, NULL, &afp2) != eslOK) esl_fatal(msg); if ( esl_msafile_a2m_Read (afp1, &msa3) != eslOK) esl_fatal(msg); if ( esl_msafile_stockholm_Read(afp2, &msa4) != eslOK) esl_fatal(msg); if ( esl_msa_Compare(msa3, msa4) != eslOK) esl_fatal(msg); remove(a2mfile2); remove(stkfile2); esl_msafile_Close(afp2); esl_msafile_Close(afp1); esl_msa_Destroy(msa1); esl_msa_Destroy(msa2); esl_msa_Destroy(msa3); esl_msa_Destroy(msa4); } static void utest_gibberish(ESL_RANDOMNESS *rng) { char msg[] = "esl_msafile_a2m gibberish test failed"; char a2mfile[32] = "esltmpXXXXXX"; FILE *fp = NULL; ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); ESL_MSA *msa = NULL; ESL_MSAFILE *msafp = NULL; ESL_MSA *msa2 = NULL; if ( esl_msa_Sample(rng, abc, 100, 100, &msa) != eslOK) esl_fatal(msg); if ( esl_msa_Validate(msa, NULL) != eslOK) esl_fatal(msg); if ( esl_msa_FlushLeftInserts(msa) != eslOK) esl_fatal(msg); // Reading A2M back in will flush inserts left. if ( esl_msa_MinimGaps(msa, NULL, NULL, TRUE) != eslOK) esl_fatal(msg); // Reading A2M back in will minimize gaps. a2m_no_O(msa); // A2M doesn't allow O residues. ESL_DASSERT1(( !(msa->flags & eslMSA_HASWGTS) )); // A2M can't store weights. ESL_DASSERT1(( msa->rf != NULL )); // A2M always implies consensus annotation. if ( esl_tmpfile_named(a2mfile, &fp) != eslOK) esl_fatal(msg); if ( esl_msafile_a2m_Write(fp, msa) != eslOK) esl_fatal(msg); fclose(fp); if ( esl_msafile_Open(&abc, a2mfile, NULL, eslMSAFILE_A2M, NULL, &msafp) != eslOK) esl_fatal(msg); if ( esl_msafile_a2m_Read(msafp, &msa2) != eslOK) esl_fatal(msg); if ( esl_msa_Validate(msa2, NULL) != eslOK) esl_fatal(msg); if ( esl_msa_Compare(msa, msa2) != eslOK) esl_fatal(msg); esl_msafile_Close(msafp); remove(a2mfile); esl_msa_Destroy(msa); esl_msa_Destroy(msa2); esl_alphabet_Destroy(abc); } #endif /*eslMSAFILE_A2M_TESTDRIVE*/ /*---------------------- end, unit tests ------------------------*/ /***************************************************************** * 4. Test driver. *****************************************************************/ #ifdef eslMSAFILE_A2M_TESTDRIVE /* compile: gcc -g -Wall -I. -L. -o esl_msafile_a2m_utest -DeslMSAFILE_A2M_TESTDRIVE esl_msafile_a2m.c -leasel -lm * (gcov): gcc -g -Wall -fprofile-arcs -ftest-coverage -I. -L. -o esl_msafile_a2m_utest -DeslMSAFILE_A2M_TESTDRIVE esl_msafile_a2m.c -leasel -lm * run: ./esl_msafile_a2m_utest */ #include "esl_config.h" #include #include "easel.h" #include "esl_getopts.h" #include "esl_random.h" #include "esl_msafile.h" #include "esl_msafile_a2m.h" static ESL_OPTIONS options[] = { /* name type default env range togs reqs incomp help docgrp */ {"-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show help and usage", 0}, {"-s", eslARG_INT, "0", NULL, NULL, NULL, NULL, NULL, "set random number seed to ", 0}, { 0,0,0,0,0,0,0,0,0,0}, }; static char usage[] = "[-options]"; static char banner[] = "test driver for A2M MSA format module"; int main(int argc, char **argv) { char msg[] = "a2m MSA i/o module test driver failed"; ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); char a2mfile[32] = "esltmpa2mXXXXXX"; char stkfile[32] = "esltmpstkXXXXXX"; FILE *a2mfp, *stkfp; int testnumber; int ngoodtests = 2; char tmpfile[32]; FILE *ofp; int expected_alphatype; int expected_nseq; int expected_alen; fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng)); if ( esl_tmpfile_named(a2mfile, &a2mfp) != eslOK) esl_fatal(msg); if ( esl_tmpfile_named(stkfile, &stkfp) != eslOK) esl_fatal(msg); write_test_msas(a2mfp, stkfp); fclose(a2mfp); fclose(stkfp); read_test_msas_digital(a2mfile, stkfile); read_test_msas_text (a2mfile, stkfile); /* Various "good" files that should be parsed correctly */ for (testnumber = 1; testnumber <= ngoodtests; testnumber++) { strcpy(tmpfile, "esltmpXXXXXX"); if (esl_tmpfile_named(tmpfile, &ofp) != eslOK) esl_fatal(msg); switch (testnumber) { case 1: utest_write_good1 (ofp, &expected_alphatype, &expected_nseq, &expected_alen); break; case 2: utest_write_good2 (ofp, &expected_alphatype, &expected_nseq, &expected_alen); break; } fclose(ofp); utest_goodfile(tmpfile, testnumber, expected_alphatype, expected_nseq, expected_alen); remove(tmpfile); } utest_gibberish(rng); fprintf(stderr, "# status = ok\n"); remove(a2mfile); remove(stkfile); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return 0; } #endif /*eslMSAFILE_A2M_TESTDRIVE*/ /*--------------------- end, test driver ------------------------*/ /***************************************************************** * 5. Examples. *****************************************************************/ #ifdef eslMSAFILE_A2M_EXAMPLE /* A full-featured example of reading/writing an MSA in A2M format. gcc -g -Wall -o esl_msafile_a2m_example -I. -L. -DeslMSAFILE_A2M_EXAMPLE esl_msafile_a2m.c -leasel -lm ./esl_msafile_a2m_example */ /*::cexcerpt::msafile_a2m_example::begin::*/ #include #include "easel.h" #include "esl_alphabet.h" #include "esl_getopts.h" #include "esl_msa.h" #include "esl_msafile.h" #include "esl_msafile_a2m.h" static ESL_OPTIONS options[] = { /* name type default env range toggles reqs incomp help docgroup*/ { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 }, { "-1", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "override autodetection; force A2M format", 0 }, { "-q", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "quieter: don't write msa back, just summary", 0 }, { "-t", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "use text mode: no digital alphabet", 0 }, { "--dna", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, "-t", "specify that alphabet is DNA", 0 }, { "--rna", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, "-t", "specify that alphabet is RNA", 0 }, { "--amino", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, "-t", "specify that alphabet is protein", 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, }; static char usage[] = "[-options] "; static char banner[] = "example of guessing, reading, writing A2M format"; int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *filename = esl_opt_GetArg(go, 1); int infmt = eslMSAFILE_UNKNOWN; ESL_ALPHABET *abc = NULL; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; int status; if (esl_opt_GetBoolean(go, "-1")) infmt = eslMSAFILE_A2M; /* override format autodetection */ if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); /* Text mode: pass NULL for alphabet. * Digital mode: pass ptr to expected ESL_ALPHABET; and if abc=NULL, alphabet is guessed */ if (esl_opt_GetBoolean(go, "-t")) status = esl_msafile_Open(NULL, filename, NULL, infmt, NULL, &afp); else status = esl_msafile_Open(&abc, filename, NULL, infmt, NULL, &afp); if (status != eslOK) esl_msafile_OpenFailure(afp, status); if ((status = esl_msafile_a2m_Read(afp, &msa)) != eslOK) esl_msafile_ReadFailure(afp, status); printf("alphabet: %s\n", (abc ? esl_abc_DecodeType(abc->type) : "none (text mode)")); printf("# of seqs: %d\n", msa->nseq); printf("# of cols: %d\n", (int) msa->alen); printf("\n"); if (! esl_opt_GetBoolean(go, "-q")) esl_msafile_a2m_Write(stdout, msa); esl_msa_Destroy(msa); esl_msafile_Close(afp); if (abc) esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); exit(0); } /*::cexcerpt::msafile_a2m_example::end::*/ #endif /*eslMSAFILE_A2M_EXAMPLE*/ #ifdef eslMSAFILE_A2M_EXAMPLE2 /* A minimal example. Read A2M MSA, in text mode gcc -g -Wall -o esl_msafile_a2m_example2 -I. -L. -DeslMSAFILE_A2M_EXAMPLE2 esl_msafile_a2m.c -leasel -lm ./esl_msafile_a2m_example */ /*::cexcerpt::msafile_a2m_example2::begin::*/ #include #include "easel.h" #include "esl_msa.h" #include "esl_msafile.h" #include "esl_msafile_a2m.h" int main(int argc, char **argv) { char *filename = argv[1]; int fmt = eslMSAFILE_A2M; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; int status; if ( (status = esl_msafile_Open(NULL, filename, NULL, fmt, NULL, &afp)) != eslOK) esl_msafile_OpenFailure(afp, status); if ( (status = esl_msafile_a2m_Read(afp, &msa)) != eslOK) esl_msafile_ReadFailure(afp, status); printf("%6d seqs, %5d columns\n", msa->nseq, (int) msa->alen); esl_msafile_a2m_Write(stdout, msa); esl_msa_Destroy(msa); esl_msafile_Close(afp); exit(0); } /*::cexcerpt::msafile_a2m_example2::end::*/ #endif /*eslMSAFILE_A2M_EXAMPLE2*/ /*--------------------- end of examples -------------------------*/