/* P7_TOPHITS: implementation of ranked list of top-scoring hits
 * 
 * Contents:
 *    1. The P7_TOPHITS object.
 *    2. Standard (human-readable) output of pipeline results.
 *    3. Tabular (parsable) output of pipeline results.
 *    4. Benchmark driver.
 *    5. Test driver.
 *    6. Copyright and license information.
 */
#include "p7_config.h"

#include <stdlib.h>
#include <string.h>
#include <limits.h>

#include "easel.h"
#include "hmmer.h"

/*****************************************************************
 *= 1. The P7_TOPHITS object
 *****************************************************************/

/* Function:  p7_tophits_Create()
 * Synopsis:  Allocate a hit list.
 *
 * Purpose:   Allocates a new <P7_TOPHITS> hit list and return a pointer
 *            to it.
 *
 * Throws:    <NULL> on allocation failure.
 */
P7_TOPHITS *
p7_tophits_Create(void)
{
  P7_TOPHITS *h = NULL;
  int         default_nalloc = 256;
  int         status;

  ESL_ALLOC(h, sizeof(P7_TOPHITS));
  h->hit    = NULL;
  h->unsrt  = NULL;

  ESL_ALLOC(h->hit,   sizeof(P7_HIT *) * default_nalloc);
  ESL_ALLOC(h->unsrt, sizeof(P7_HIT)   * default_nalloc);
  h->Nalloc    = default_nalloc;
  h->N         = 0;
  h->nreported = 0;
  h->nincluded = 0;
  h->is_sorted_by_sortkey = TRUE; /* but only because there's 0 hits */
  h->is_sorted_by_seqidx  = FALSE;
  h->hit[0]    = h->unsrt;        /* if you're going to call it "sorted" when it contains just one hit, you need this */
  return h;

 ERROR:
  p7_tophits_Destroy(h);
  return NULL;
}


/* Function:  p7_tophits_Grow()
 * Synopsis:  Reallocates a larger hit list, if needed.
 *
 * Purpose:   If list <h> cannot hold another hit, doubles
 *            the internal allocation.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation failure; in this case,
 *            the data in <h> are unchanged.
 */
int
p7_tophits_Grow(P7_TOPHITS *h)
{
  void   *p;
  P7_HIT *ori    = h->unsrt;
  int     Nalloc = h->Nalloc * 2;    /* grow by doubling */
  int     i;
  int     status;

  if (h->N < h->Nalloc) return eslOK; /* we have enough room for another hit */

  ESL_RALLOC(h->hit,   p, sizeof(P7_HIT *) * Nalloc);
  ESL_RALLOC(h->unsrt, p, sizeof(P7_HIT)   * Nalloc);

  /* If we grow a sorted list, we have to translate the pointers
   * in h->hit, because h->unsrt might have just moved in memory. 
   */
  if (h->is_sorted_by_seqidx || h->is_sorted_by_sortkey)
  {
      for (i = 0; i < h->N; i++)
        h->hit[i] = h->unsrt + (h->hit[i] - ori);
  }

  h->Nalloc = Nalloc;
  return eslOK;

 ERROR:
  return eslEMEM;
}


/* Function:  p7_tophits_CreateNextHit()
 * Synopsis:  Get pointer to new structure for recording a hit.
 *
 * Purpose:   Ask the top hits object <h> to do any necessary
 *            internal allocation and bookkeeping to add a new,
 *            empty hit to its list; return a pointer to 
 *            this new <P7_HIT> structure for data to be filled
 *            in by the caller.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation error.
 */
int
p7_tophits_CreateNextHit(P7_TOPHITS *h, P7_HIT **ret_hit)
{
  P7_HIT *hit = NULL;
  int     status;

  if ((status = p7_tophits_Grow(h)) != eslOK) goto ERROR;
  
  hit = &(h->unsrt[h->N]);
  h->N++;
  if (h->N >= 2) 
  {
      h->is_sorted_by_seqidx = FALSE;
      h->is_sorted_by_sortkey = FALSE;
  }

  hit->name         = NULL;
  hit->acc          = NULL;
  hit->desc         = NULL;
  hit->sortkey      = 0.0;

  hit->score        = 0.0;
  hit->pre_score    = 0.0;
  hit->sum_score    = 0.0;

  hit->lnP          = 0.0;
  hit->pre_lnP      = 0.0;
  hit->sum_lnP      = 0.0;

  hit->ndom         = 0;
  hit->nexpected    = 0.0;
  hit->nregions     = 0;
  hit->nclustered   = 0;
  hit->noverlaps    = 0;
  hit->nenvelopes   = 0;

  hit->flags        = p7_HITFLAGS_DEFAULT;
  hit->nreported    = 0;
  hit->nincluded    = 0;
  hit->best_domain  = -1;
  hit->dcl          = NULL;
  hit->offset       = 0;

  *ret_hit = hit;
  return eslOK;

 ERROR:
  *ret_hit = NULL;
  return status;
}



/* Function:  p7_tophits_Add()
 * Synopsis:  Add a hit to the top hits list.
 *
 * Purpose:   Adds a hit to the top hits list <h>. 
 * 
 *            <name>, <acc>, and <desc> are copied, so caller may free
 *            them if it likes.
 *            
 *            Only the pointer <ali> is kept. Caller turns over memory
 *            management of <ali> to the top hits object; <ali> will
 *            be free'd when the top hits structure is free'd.
 *
 * Args:      h        - active top hit list
 *            name     - name of target  
 *            acc      - accession of target (may be NULL)
 *            desc     - description of target (may be NULL) 
 *            sortkey  - value to sort by: bigger is better
 *            score    - score of this hit
 *            lnP      - log P-value of this hit 
 *            mothersc - score of parent whole sequence 
 *            mother_lnP - log P-value of parent whole sequence
 *            sqfrom   - 1..L pos in target seq  of start
 *            sqto     - 1..L pos; sqfrom > sqto if rev comp
 *            sqlen    - length of sequence, L
 *            hmmfrom  - 0..M+1 pos in HMM of start
 *            hmmto    - 0..M+1 pos in HMM of end
 *            hmmlen   - length of HMM, M
 *            domidx   - number of this domain 
 *            ndom     - total # of domains in sequence
 *            ali      - optional printable alignment info
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> if reallocation failed.
 * 
 * Note:      Is this actually used anywhere? (SRE, 10 Dec 08) 
 *            I think it's not up to date.
 *            
 *            That's right. This function is obsolete.
 *            But it is used in benchmark and test code, so you can't
 *            delete it yet; benchmarks and test code should be
 *            revised (SRE, 26 Oct 09)
 */
int
p7_tophits_Add(P7_TOPHITS *h,
         char *name, char *acc, char *desc,
         double sortkey,
         float score,    double lnP,
         float mothersc, double mother_lnP,
         int sqfrom, int sqto, int sqlen,
         int hmmfrom, int hmmto, int hmmlen,
         int domidx, int ndom,
         P7_ALIDISPLAY *ali)
{
  int status;

  if ((status = p7_tophits_Grow(h))                           != eslOK) return status;
  if ((status = esl_strdup(name, -1, &(h->unsrt[h->N].name))) != eslOK) return status;
  if ((status = esl_strdup(acc,  -1, &(h->unsrt[h->N].acc)))  != eslOK) return status;
  if ((status = esl_strdup(desc, -1, &(h->unsrt[h->N].desc))) != eslOK) return status;
  h->unsrt[h->N].sortkey    = sortkey;
  h->unsrt[h->N].score      = score;
  h->unsrt[h->N].pre_score  = 0.0;
  h->unsrt[h->N].sum_score  = 0.0;
  h->unsrt[h->N].lnP        = lnP;
  h->unsrt[h->N].pre_lnP    = 0.0;
  h->unsrt[h->N].sum_lnP    = 0.0;
  h->unsrt[h->N].nexpected  = 0;
  h->unsrt[h->N].nregions   = 0;
  h->unsrt[h->N].nclustered = 0;
  h->unsrt[h->N].noverlaps  = 0;
  h->unsrt[h->N].nenvelopes = 0;
  h->unsrt[h->N].ndom       = ndom;
  h->unsrt[h->N].flags      = 0;
  h->unsrt[h->N].nreported  = 0;
  h->unsrt[h->N].nincluded  = 0;
  h->unsrt[h->N].best_domain= 0;
  h->unsrt[h->N].dcl        = NULL;
  h->N++;

  if (h->N >= 2) {
    h->is_sorted_by_seqidx = FALSE;
    h->is_sorted_by_sortkey = FALSE;
  }
  return eslOK;
}

/* hit_sorter(): qsort's pawn, below */
static int
hit_sorter_by_sortkey(const void *vh1, const void *vh2)
{
  P7_HIT *h1 = *((P7_HIT **) vh1);  /* don't ask. don't change. Don't Panic. */
  P7_HIT *h2 = *((P7_HIT **) vh2);
  int     c;

  if      (h1->sortkey < h2->sortkey) return  1;
  else if (h1->sortkey > h2->sortkey) return -1;
  else {
    if ( (c = strcmp(h1->name, h2->name)) != 0) return c;

    /* if on different strand, the positive strand goes first, else use position */
    int dir1 = (h1->dcl[0].iali < h1->dcl[0].jali ? 1 : -1);
    int dir2 = (h2->dcl[0].iali < h2->dcl[0].jali ? 1 : -1);
    if (dir1 != dir2) return dir2; // so if dir1 is pos (1), and dir2 is neg (-1), this will return -1, placing h1 before h2;  otherwise, vice versa
    else              return (h1->dcl[0].iali > h2->dcl[0].iali ? 1 : -1 );

  }
}

static int
hit_sorter_by_seqidx(const void *vh1, const void *vh2)
{
  P7_HIT *h1 = *((P7_HIT **) vh1);  /* don't ask. don't change. Don't Panic. */
  P7_HIT *h2 = *((P7_HIT **) vh2);

  if      (h1->seqidx > h2->seqidx) return  1; /* first key, seq_idx (unique id for sequences), low to high */
  else if (h1->seqidx < h2->seqidx) return -1;
  // if on different strand, the positive strand goes first, else use position
  int dir1 = (h1->dcl[0].iali < h1->dcl[0].jali ? 1 : -1);
  int dir2 = (h2->dcl[0].iali < h2->dcl[0].jali ? 1 : -1);

  if (dir1 != dir2) return dir2; // so if dir1 is pos (1), and dir2 is neg (-1), this will return -1, placing h1 before h2;  otherwise, vice versa
  else              return  (h1->dcl[0].iali > h2->dcl[0].iali ? 1 : -1 );
}


/* Function:  p7_tophits_SortBySortkey()
 * Synopsis:  Sorts a hit list.
 *
 * Purpose:   Sorts a top hit list. After this call,
 *            <h->hit[i]> points to the i'th ranked 
 *            <P7_HIT> for all <h->N> hits.
 *
 * Returns:   <eslOK> on success.
 */
int
p7_tophits_SortBySortkey(P7_TOPHITS *h)
{
  int i;

  if (h->is_sorted_by_sortkey)  return eslOK;
  for (i = 0; i < h->N; i++) h->hit[i] = h->unsrt + i;
  if (h->N > 1)  qsort(h->hit, h->N, sizeof(P7_HIT *), hit_sorter_by_sortkey);
  h->is_sorted_by_seqidx  = FALSE;
  h->is_sorted_by_sortkey = TRUE;
  return eslOK;
}


/* Function:  p7_tophits_SortBySeqidx()
 * Synopsis:  Sorts a hit list by sequence index and position in that
 *            sequence at which the hit's first domain begins
 *
 * Purpose:   Sorts a top hit list. After this call,
 *            <h->hit[i]> points to the i'th ranked
 *            <P7_HIT> for all <h->N> hits.
 *
 * Returns:   <eslOK> on success.
 */
int
p7_tophits_SortBySeqidx(P7_TOPHITS *h)
{
  int i;

  if (h->is_sorted_by_seqidx)  return eslOK;
  for (i = 0; i < h->N; i++) h->hit[i] = h->unsrt + i;
  if (h->N > 1)  qsort(h->hit, h->N, sizeof(P7_HIT *), hit_sorter_by_seqidx);
  h->is_sorted_by_sortkey = FALSE;
  h->is_sorted_by_seqidx  = TRUE;
  return eslOK;
}



/* Function:  p7_tophits_Merge()
 * Synopsis:  Merge two top hits lists.
 *
 * Purpose:   Merge <h2> into <h1>. Upon return, <h1>
 *            contains the sorted, merged list. <h2>
 *            is effectively destroyed; caller should
 *            not access it further, and may as well free
 *            it immediately.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation failure, and
 *            both <h1> and <h2> remain valid.
 */
int
p7_tophits_Merge(P7_TOPHITS *h1, P7_TOPHITS *h2)
{
  void    *p;
  P7_HIT **new_hit = NULL;
  P7_HIT  *ori1    = h1->unsrt;    /* original base of h1's data */
  P7_HIT  *new2;
  int      i,j,k;
  int      Nalloc = h1->Nalloc + h2->Nalloc;
  int      status;

  /* Make sure the two lists are sorted */
  if ((status = p7_tophits_SortBySortkey(h1)) != eslOK) goto ERROR;
  if ((status = p7_tophits_SortBySortkey(h2)) != eslOK) goto ERROR;

  /* Attempt our allocations, so we fail early if we fail. 
   * Reallocating h1->unsrt screws up h1->hit, so fix it.
   */
  ESL_RALLOC(h1->unsrt, p, sizeof(P7_HIT) * Nalloc);
  ESL_ALLOC (new_hit, sizeof(P7_HIT *)    * Nalloc);
  for (i = 0; i < h1->N; i++)
    h1->hit[i] = h1->unsrt + (h1->hit[i] - ori1);

  /* Append h2's unsorted data array to h1. h2's data begin at <new2> */
  new2 = h1->unsrt + h1->N;
  memcpy(new2, h2->unsrt, sizeof(P7_HIT) * h2->N);

  /* Merge the sorted hit lists */
  for (i=0,j=0,k=0; i < h1->N && j < h2->N ; k++)
    new_hit[k] = (hit_sorter_by_sortkey(&h1->hit[i], &h2->hit[j]) > 0) ? new2 + (h2->hit[j++] - h2->unsrt) : h1->hit[i++];
  while (i < h1->N) new_hit[k++] = h1->hit[i++];
  while (j < h2->N) new_hit[k++] = new2 + (h2->hit[j++] - h2->unsrt);

  /* h2 now turns over management of name, acc, desc memory to h1;
   * nullify its pointers, to prevent double free.  */
  for (i = 0; i < h2->N; i++)
  {
      h2->unsrt[i].name = NULL;
      h2->unsrt[i].acc  = NULL;
      h2->unsrt[i].desc = NULL;
      h2->unsrt[i].dcl  = NULL;
  }

  /* Construct the new grown h1 */
  free(h1->hit);
  h1->hit    = new_hit;
  h1->Nalloc = Nalloc;
  h1->N     += h2->N;
  /* and is_sorted is TRUE, as a side effect of p7_tophits_Sort() above. */
  return eslOK;
  
 ERROR:
  if (new_hit != NULL) free(new_hit);
  return status;
}

/* Function:  p7_tophits_GetMaxPositionLength()
 * Synopsis:  Returns maximum position length in hit list (targets).
 *
 * Purpose:   Returns the length of the longest hit location (start/end)
 *               of all the registered hits, in chars. This is useful when
 *               deciding how to format output.
 *
 *            The maximum is taken over all registered hits. This
 *            opens a possible side effect: caller might print only
 *            the top hits, and the max name length in these top hits
 *            may be different than the max length over all the hits.
 *
 *            Used specifically for nhmmer output, so expects only one
 *            domain per hit
 *
 *            If there are no hits in <h>, or none of the
 *            hits have names, returns 0.
 */
int
p7_tophits_GetMaxPositionLength(P7_TOPHITS *h)
{
  int i, max, n;
  char buffer [11];

  for (max = 0, i = 0; i < h->N; i++) {
    if (h->unsrt[i].dcl[0].iali > 0) {
      n = sprintf (buffer, "%d", h->unsrt[i].dcl[0].iali);
      max = ESL_MAX(n, max);
      n = sprintf (buffer, "%d", h->unsrt[i].dcl[0].jali);
      max = ESL_MAX(n, max);
    }
  }
  return max;
}

/* Function:  p7_tophits_GetMaxNameLength()
 * Synopsis:  Returns maximum name length in hit list (targets).
 *
 * Purpose:   Returns the maximum name length of all the registered
 *            hits, in chars. This is useful when deciding how to
 *            format output.
 *            
 *            The maximum is taken over all registered hits. This
 *            opens a possible side effect: caller might print only
 *            the top hits, and the max name length in these top hits
 *            may be different than the max length over all the hits.
 *            
 *            If there are no hits in <h>, or none of the
 *            hits have names, returns 0.
 */
int
p7_tophits_GetMaxNameLength(P7_TOPHITS *h)
{
  int i, max, n;
  for (max = 0, i = 0; i < h->N; i++)
    if (h->unsrt[i].name != NULL) {
      n   = strlen(h->unsrt[i].name);
      max = ESL_MAX(n, max);
    }
  return max;
}

/* Function:  p7_tophits_GetMaxAccessionLength()
 * Synopsis:  Returns maximum accession length in hit list (targets).
 *
 * Purpose:   Same as <p7_tophits_GetMaxNameLength()>, but for
 *            accessions. If there are no hits in <h>, or none
 *            of the hits have accessions, returns 0.
 */
int
p7_tophits_GetMaxAccessionLength(P7_TOPHITS *h)
{
  int i, max, n;
  for (max = 0, i = 0; i < h->N; i++)
    if (h->unsrt[i].acc != NULL) {
      n   = strlen(h->unsrt[i].acc);
      max = ESL_MAX(n, max);
    }
  return max;
}

/* Function:  p7_tophits_GetMaxShownLength()
 * Synopsis:  Returns max shown name/accession length in hit list.
 *
 * Purpose:   Same as <p7_tophits_GetMaxNameLength()>, but 
 *            for the case when --acc is on, where
 *            we show accession if one is available, and 
 *            fall back to showing the name if it is not.
 *            Returns the max length of whatever is being
 *            shown as the reported "name".
 */
int
p7_tophits_GetMaxShownLength(P7_TOPHITS *h)
{
  int i, max, n;
  for (max = 0, i = 0; i < h->N; i++)
  {
    if (h->unsrt[i].acc != NULL && h->unsrt[i].acc[0] != '\0')
    {
      n   = strlen(h->unsrt[i].acc);
      max = ESL_MAX(n, max);
    }
    else if (h->unsrt[i].name != NULL)
    {
      n   = strlen(h->unsrt[i].name);
      max = ESL_MAX(n, max);
    }
  }
  return max;
}


/* Function:  p7_tophits_Reuse()
 * Synopsis:  Reuse a hit list, freeing internals.
 *
 * Purpose:   Reuse the tophits list <h>; save as 
 *            many malloc/free cycles as possible,
 *            as opposed to <Destroy()>'ing it and
 *            <Create>'ing a new one.
 */
int
p7_tophits_Reuse(P7_TOPHITS *h)
{
  int i, j;

  if (h == NULL) return eslOK;
  if (h->unsrt != NULL) 
  {
    for (i = 0; i < h->N; i++)
    {
      if (h->unsrt[i].name != NULL) free(h->unsrt[i].name);
      if (h->unsrt[i].acc  != NULL) free(h->unsrt[i].acc);
      if (h->unsrt[i].desc != NULL) free(h->unsrt[i].desc);
      if (h->unsrt[i].dcl  != NULL) {
        for (j = 0; j < h->unsrt[i].ndom; j++)
          if (h->unsrt[i].dcl[j].ad != NULL) p7_alidisplay_Destroy(h->unsrt[i].dcl[j].ad);
        free(h->unsrt[i].dcl);
      }
    }
  }
  h->N         = 0;
  h->is_sorted_by_seqidx = FALSE;
  h->is_sorted_by_sortkey = TRUE;  /* because there are 0 hits */
  h->hit[0]    = h->unsrt;
  return eslOK;
}

/* Function:  p7_tophits_Destroy()
 * Synopsis:  Frees a hit list.
 */
void
p7_tophits_Destroy(P7_TOPHITS *h)
{
  int i,j;
  if (h == NULL) return;
  if (h->hit   != NULL) free(h->hit);
  if (h->unsrt != NULL) 
  {
    for (i = 0; i < h->N; i++)
    {
      if (h->unsrt[i].name != NULL) free(h->unsrt[i].name);
      if (h->unsrt[i].acc  != NULL) free(h->unsrt[i].acc);
      if (h->unsrt[i].desc != NULL) free(h->unsrt[i].desc);
      if (h->unsrt[i].dcl  != NULL) {
        for (j = 0; j < h->unsrt[i].ndom; j++)
          if (h->unsrt[i].dcl[j].ad != NULL) p7_alidisplay_Destroy(h->unsrt[i].dcl[j].ad);
        free(h->unsrt[i].dcl);
      }
    }
    free(h->unsrt);
  }
  free(h);
  return;
}
/*---------------- end, P7_TOPHITS object -----------------------*/






/*****************************************************************
 * 2. Standard (human-readable) output of pipeline results
 *****************************************************************/

/* workaround_bug_h74(): 
 * Different envelopes, identical alignment
 * 
 * Bug #h74, though extremely rare, arises from a limitation in H3's
 * implementation of Forward/Backward, as follows:
 * 
 *  1. A multidomain region is analyzed by stochastic clustering
 *  2. Overlapping envelopes are found (w.r.t sequence coords), though
 *     trace clusters are distinct if HMM endpoints are also considered.
 *  3. We have no facility for limiting Forward/Backward to a specified
 *     range of profile coordinates, so each envelope is passed to
 *     rescore_isolated_domain() and analyzed independently.
 *  4. Optimal accuracy alignment may identify exactly the same alignment
 *     in the overlap region shared by the two envelopes.
 *     
 * The disturbing result is two different envelopes that have
 * identical alignments and alignment endpoints.
 * 
 * The correct fix is to define envelopes not only by sequence
 * endpoints but also by profile endpoints, passing them to
 * rescore_isolated_domain(), and limiting F/B calculations to this
 * pieces of the DP lattice. This requires a fair amount of work,
 * adding to the optimized API.
 * 
 * The workaround is to detect when there are duplicate alignments,
 * and only display one. We show the one with the best bit score.
 * 
 * If we ever implement envelope-limited versions of F/B, revisit this
 * fix.
 *
 * SRE, Tue Dec 22 16:27:04 2009
 * xref J5/130; notebook/2009/1222-hmmer-bug-h74
 */
static int
workaround_bug_h74(P7_TOPHITS *th)
{
  int h;
  int d1, d2;
  int dremoved;

  for (h = 0; h < th->N; h++)  
    if (th->hit[h]->noverlaps)
    {
        for (d1 = 0; d1 < th->hit[h]->ndom; d1++)
          for (d2 = d1+1; d2 < th->hit[h]->ndom; d2++)
            if (th->hit[h]->dcl[d1].iali == th->hit[h]->dcl[d2].iali &&
                th->hit[h]->dcl[d1].jali == th->hit[h]->dcl[d2].jali)
            {
                dremoved = (th->hit[h]->dcl[d1].bitscore >= th->hit[h]->dcl[d2].bitscore) ? d2 : d1;
                if (th->hit[h]->dcl[dremoved].is_reported) { th->hit[h]->dcl[dremoved].is_reported = FALSE; th->hit[h]->nreported--; }
                if (th->hit[h]->dcl[dremoved].is_included) { th->hit[h]->dcl[dremoved].is_included = FALSE; th->hit[h]->nincluded--; }
            }
    }
  return eslOK;
}



/* Function:  p7_tophits_ComputeNhmmerEvalues()
 * Synopsis:  Compute e-values based on pvalues and window sizes.
 *
 * Purpose:   After nhmmer pipeline has completed, the th object contains
 *               hits where the p-values haven't yet been converted to
 *               e-values. That modification depends on an established
 *               number of sequences. In nhmmer, this is computed as N/W,
 *               for a database of N residues, where W is some standardized
 *               window length (nhmmer passes om->max_length). E-values are
 *               set here based on that formula. We also set the sortkey so
 *               the output will be sorted correctly.
 *
 * Returns:   <eslOK> on success.
 */
int
p7_tophits_ComputeNhmmerEvalues(P7_TOPHITS *th, double N, int W)
{
  int i;    /* counters over hits */

  for (i = 0; i < th->N ; i++)
  {
    th->unsrt[i].lnP        += log((float)N / (float)W);
    th->unsrt[i].dcl[0].lnP  = th->unsrt[i].lnP;
    th->unsrt[i].sortkey     = -1.0 * th->unsrt[i].lnP;
  }
  return eslOK;
}


/* Function:  p7_tophits_RemoveDuplicates()
 * Synopsis:  Remove overlapping hits.
 *
 * Purpose:   After nhmmer pipeline has completed, the TopHits object may
 *               contain duplicates if the target was broken into overlapping
 *               windows. Scan through, and remove duplicates.  Since the
 *               duplicates may be incomplete (one sequence is a partial
 *               hit because it's window didn't cover the full length of
 *               the hit), keep the one with better p-value
 *
 * Returns:   <eslOK> on success.
 */
int
p7_tophits_RemoveDuplicates(P7_TOPHITS *th)
{
  int     i;    /* counter over hits */
  int     j;    /* previous un-duplicated hit */
  int     s_i, s_j, e_i, e_j, dir_i, dir_j, len_i, len_j;
  int64_t sub_i, sub_j;
  double  p_i, p_j;
  int remove;

  if (th->N<2) return eslOK;

  j=0;
  for (i = 1; i < th->N; i++)
  {

      sub_j = th->hit[j]->subseq_start;
      p_j = th->hit[j]->lnP;
      s_j = th->hit[j]->dcl[0].iali;
      e_j = th->hit[j]->dcl[0].jali;
      dir_j = (s_j < e_j ? 1 : -1);
      len_j = 1 + dir_j * (e_j - s_j) ;


      sub_i = th->hit[i]->subseq_start;
      p_i = th->hit[i]->lnP;
      s_i = th->hit[i]->dcl[0].iali;
      e_i = th->hit[i]->dcl[0].jali;
      dir_i = (s_i < e_i ? 1 : -1);
      len_i = 1 + dir_i * (e_i - s_i) ;

      if ( th->hit[i]->seqidx ==  th->hit[i-1]->seqidx  && //same source sequence
         //  sub_i != sub_j && // not from the same subsequence ... if they are, then domaindef already split them up
           dir_i == dir_j && // only bother removing if the overlapping hits are on the same strand
           (
               ( s_i >= s_j-3 && s_i <= s_j+3) ||  // at least one side is essentially flush (
               ( e_i >= e_j-3 && e_i <= e_j+3)
           )
      )
      {
        /* Force one to go unreported.  I prefer to keep the one with the
         * better e-value.  This addresses two issues
         * (1) longer hits sometimes encounter higher bias corrections,
         *     leading to lower scores; seems better to focus on the
         *     high-scoring heart of the alignment, if we have a
         *     choice
         * (2) it is possible that a lower-scoring longer hit (see #1)
         *     that is close to threshold will pass the pipeline in
         *     one condition and not the other (e.g. --toponly, or
         *     single vs multi threaded), and if longer hits obscure
         *     shorter higher-scoring ones, a shorter "hit" might be
         *     lost by being obscured by a longer one that is subsequently
         *     removed due to insufficient score.
         * see late notes in ~wheelert/notebook/2012/0518-dfam-scripts/00NOTES
        */
        //remove = 0; // 1 := keep i,  0 := keep i-1
        //if (s_i==s_j && e_i==e_j) // if same length, choose the one with lower p-value (they should be roughly the same)
          remove = p_i < p_j ? j : i;
        //else // otherwise remove the shorter one (assume that one was cut short by the end of a segment
        //  remove = len_i > len_j ? j : i;

        th->hit[remove]->flags |= p7_IS_DUPLICATE;

        j = remove == j ? i : j;
      } else {
        j = i;
      }
  }
  return eslOK;
}



/* Function:  p7_tophits_Threshold()
 * Synopsis:  Apply score and E-value thresholds to a hitlist before output.
 *
 * Purpose:   After a pipeline has completed, go through it and mark all
 *            the targets and domains that are "significant" (satisfying
 *            the reporting thresholds set for the pipeline). 
 *            
 *            Also sets the final total number of reported and
 *            included targets, the number of reported and included
 *            targets in each target, and the size of the search space
 *            for per-domain conditional E-value calculations,
 *            <pli->domZ>. By default, <pli->domZ> is the number of
 *            significant targets reported.
 *
 *            If model-specific thresholds were used in the pipeline,
 *            we cannot apply those thresholds now. They were already
 *            applied in the pipeline. In this case all we're
 *            responsible for here is counting them (setting
 *            nreported, nincluded counters).
 *            
 * Returns:   <eslOK> on success.
 */
int
p7_tophits_Threshold(P7_TOPHITS *th, P7_PIPELINE *pli)
{
  int h, d;    /* counters over sequence hits, domains in sequences */
  
  /* Flag reported, included targets (if we're using general thresholds) */
  if (! pli->use_bit_cutoffs) 
  {
    for (h = 0; h < th->N; h++)
    {

      if ( !(th->hit[h]->flags & p7_IS_DUPLICATE) &&
          p7_pli_TargetReportable(pli, th->hit[h]->score, th->hit[h]->lnP))
      {
          th->hit[h]->flags |= p7_IS_REPORTED;
          if (p7_pli_TargetIncludable(pli, th->hit[h]->score, th->hit[h]->lnP))
              th->hit[h]->flags |= p7_IS_INCLUDED;

          if (pli->long_targets) { // no domains in dna search, so:
            th->hit[h]->dcl[0].is_reported = th->hit[h]->flags & p7_IS_REPORTED;
            th->hit[h]->dcl[0].is_included = th->hit[h]->flags & p7_IS_INCLUDED;
          }
      }
    }
  }

  /* Count reported, included targets */
  th->nreported = 0;
  th->nincluded = 0;
  for (h = 0; h < th->N; h++)
  {
      if (th->hit[h]->flags & p7_IS_REPORTED)  th->nreported++;
      if (th->hit[h]->flags & p7_IS_INCLUDED)  th->nincluded++;
  }
  
  /* Now we can determined domZ, the effective search space in which additional domains are found */
  if (pli->domZ_setby == p7_ZSETBY_NTARGETS) pli->domZ = (double) th->nreported;


  /* Second pass is over domains, flagging reportable/includable ones. 
   * Depends on knowing the domZ we just set.
   * Note how this enforces a hierarchical logic of 
   * (sequence|domain) must be reported to be included, and
   * domain can only be (reported|included) if whole sequence is too.
   */
  if (! pli->use_bit_cutoffs && !pli->long_targets)
  {
    for (h = 0; h < th->N; h++)
    {
      if (th->hit[h]->flags & p7_IS_REPORTED)
      {
        for (d = 0; d < th->hit[h]->ndom; d++)
        {
          if (p7_pli_DomainReportable(pli, th->hit[h]->dcl[d].bitscore, th->hit[h]->dcl[d].lnP))
            th->hit[h]->dcl[d].is_reported = TRUE;
          if ((th->hit[h]->flags & p7_IS_INCLUDED) &&
              p7_pli_DomainIncludable(pli, th->hit[h]->dcl[d].bitscore, th->hit[h]->dcl[d].lnP))
            th->hit[h]->dcl[d].is_included = TRUE;
        }
      }
    }
  }

  /* Count the reported, included domains */
  for (h = 0; h < th->N; h++)  
    for (d = 0; d < th->hit[h]->ndom; d++)
    {
        if (th->hit[h]->dcl[d].is_reported) th->hit[h]->nreported++;
        if (th->hit[h]->dcl[d].is_included) th->hit[h]->nincluded++;
    }

  workaround_bug_h74(th);  /* blech. This function is defined above; see commentary and crossreferences there. */

  return eslOK;
}





/* Function:  p7_tophits_CompareRanking()
 * Synopsis:  Compare current top hits to previous top hits ranking.
 *
 * Purpose:   Using a keyhash <kh> of the previous top hits and the
 *            their ranks, look at the current top hits list <th>
 *            and flag new hits that are included for the first time
 *            (by setting <p7_IS_NEW> flag) and hits that were 
 *            included previously, but are now below the inclusion
 *            threshold in the list (<by setting <p7_IS_DROPPED>
 *            flag). 
 *
 *            The <th> must already have been processed by
 *            <p7_tophits_Threshold()>. We assume the <is_included>,
 *            <is_reported> flags are set on the appropriate hits.
 * 
 *            Upon return, the keyhash <kh> is updated to hash the
 *            current top hits list and their ranks. 
 *            
 *            Optionally, <*opt_nnew> is set to the number of 
 *            newly included hits. jackhmmer uses this as part of
 *            its convergence criteria, for example.
 *            
 *            These flags affect output of top target hits from
 *            <p7_tophits_Targets()>. 
 *            
 *            It only makes sense to call this function in context of
 *            an iterative search.
 *            
 *            The <p7_IS_NEW> flag is comprehensive: all new hits
 *            are flagged (and counted in <*opt_nnew>). The <p7_WAS_DROPPED> 
 *            flag is not comprehensive: only those hits that still 
 *            appear in the current top hits list are flagged. If a 
 *            hit dropped entirely off the list, it isn't counted
 *            as "dropped". (This could be done, but we would want
 *            to have two keyhashes, one old and one new, to do the
 *            necessary comparisons efficiently.)
 *            
 *            If the target names in <th> are not unique, results may
 *            be strange.
 *
 * Args:      th         - current top hits list
 *            kh         - hash of top hits' ranks (in: previous tophits; out: <th>'s tophits)
 *            opt_nnew   - optRETURN: number of new hits above inclusion threshold
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> if <kh> needed to be reallocated but this failed.
 */
int
p7_tophits_CompareRanking(P7_TOPHITS *th, ESL_KEYHASH *kh, int *opt_nnew)
{
  int nnew = 0;
  int oldrank;
  int h;
  int status;

  /* Flag the hits in the list with whether they're new in the included top hits,
   * and whether they've dropped off the included list.
   */
  for (h = 0; h < th->N; h++)
  {
    esl_keyhash_Lookup(kh, th->hit[h]->name, -1, &oldrank);
      
    if (th->hit[h]->flags & p7_IS_INCLUDED) 
    {
      if (oldrank == -1) { th->hit[h]->flags |= p7_IS_NEW; nnew++; }
    }
    else 
    {
      if (oldrank >=  0) th->hit[h]->flags |= p7_IS_DROPPED;
    }
  }

  /* Replace the old rank list with the new one */
  esl_keyhash_Reuse(kh);
  for (h = 0; h < th->N; h++)
  {
    if (th->hit[h]->flags & p7_IS_INCLUDED)
    {
      /* What happens when the same sequence name appears twice? It gets stored with higher rank */
      status = esl_keyhash_Store(kh, th->hit[h]->name, -1, NULL);
      if (status != eslOK && status != eslEDUP) goto ERROR;
    }
  }
  
  if (opt_nnew != NULL) *opt_nnew = nnew;
  return eslOK;

 ERROR:
  if (opt_nnew != NULL) *opt_nnew = 0;
  return status;
}


/* Function:  p7_tophits_Targets()
 * Synopsis:  Format and write a top target hits list to an output stream.
 *
 * Purpose:   Output a list of the reportable top target hits in <th> 
 *            in human-readable ASCII text format to stream <ofp>, using
 *            final pipeline accounting stored in <pli>. 
 * 
 *            The tophits list <th> should already be sorted (see
 *            <p7_tophits_Sort()> and thresholded (see
 *            <p7_tophits_Threshold>).
 *
 * Returns:   <eslOK> on success.
 * 
 * Throws:    <eslEWRITE> on write failure.
 */
int
p7_tophits_Targets(FILE *ofp, P7_TOPHITS *th, P7_PIPELINE *pli, int textw)
{
  char   newness;
  int    h;
  int    d;
  int    namew;
  int    posw;
  int    descw;
  char  *showname;

  int    have_printed_incthresh = FALSE;

  /* when --acc is on, we'll show accession if available, and fall back to name */
  if (pli->show_accessions) namew = ESL_MAX(8, p7_tophits_GetMaxShownLength(th));
  else                      namew = ESL_MAX(8, p7_tophits_GetMaxNameLength(th));


  if (pli->long_targets) 
  {
      posw = ESL_MAX(6, p7_tophits_GetMaxPositionLength(th));

      if (textw >  0)           descw = ESL_MAX(32, textw - namew - 2*posw - 32); /* 32 chars excluding desc and two posw's is from the format: 2 + 9+2 +6+2 +5+2 +<name>+1 +<startpos>+1 +<endpos>+1 +1 */
      else                      descw = 0;                               /* unlimited desc length is handled separately */

      if (fprintf(ofp, "Scores for complete hit%s:\n",     pli->mode == p7_SEARCH_SEQS ? "s" : "") < 0)
        ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
      if (fprintf(ofp, "  %9s %6s %5s  %-*s %*s %*s  %s\n",
      "E-value", " score", " bias", namew, (pli->mode == p7_SEARCH_SEQS ? "Sequence":"Model"), posw, "start", posw, "end", "Description") < 0)
        ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
      if (fprintf(ofp, "  %9s %6s %5s  %-*s %*s %*s  %s\n",
      "-------", "------", "-----", namew, "--------", posw, "-----", posw, "-----", "-----------") < 0)
        ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
  }
  else 
  {

      if (textw >  0)           descw = ESL_MAX(32, textw - namew - 61); /* 61 chars excluding desc is from the format: 2 + 22+2 +22+2 +8+2 +<name>+1 */
      else                      descw = 0;                               /* unlimited desc length is handled separately */


      /* The minimum width of the target table is 111 char: 47 from fields, 8 from min name, 32 from min desc, 13 spaces */
      if (fprintf(ofp, "Scores for complete sequence%s (score includes all domains):\n", pli->mode == p7_SEARCH_SEQS ? "s" : "") < 0) 
        ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
      if (fprintf(ofp, "  %22s  %22s  %8s\n",                              " --- full sequence ---",        " --- best 1 domain ---",   "-#dom-") < 0) 
        ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
      if (fprintf(ofp, "  %9s %6s %5s  %9s %6s %5s  %5s %2s  %-*s %s\n", 
      "E-value", " score", " bias", "E-value", " score", " bias", "  exp",  "N", namew, (pli->mode == p7_SEARCH_SEQS ? "Sequence":"Model"), "Description") < 0)
        ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
      if (fprintf(ofp, "  %9s %6s %5s  %9s %6s %5s  %5s %2s  %-*s %s\n", 
      "-------", "------", "-----", "-------", "------", "-----", " ----", "--", namew, "--------", "-----------") < 0)
        ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
  }

  for (h = 0; h < th->N; h++)
    if (th->hit[h]->flags & p7_IS_REPORTED)
    {
        d    = th->hit[h]->best_domain;

        if (! (th->hit[h]->flags & p7_IS_INCLUDED) && ! have_printed_incthresh) 
        {
          if (fprintf(ofp, "  ------ inclusion threshold ------\n") < 0)
            ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
          have_printed_incthresh = TRUE;
        }

        if (pli->show_accessions)
        {   /* the --acc option: report accessions rather than names if possible */
            if (th->hit[h]->acc != NULL && th->hit[h]->acc[0] != '\0') showname = th->hit[h]->acc;
            else                                                       showname = th->hit[h]->name;
        }
        else
          showname = th->hit[h]->name;

        if      (th->hit[h]->flags & p7_IS_NEW)     newness = '+';
        else if (th->hit[h]->flags & p7_IS_DROPPED) newness = '-';
        else                                        newness = ' ';

        if (pli->long_targets) 
        {
          if (fprintf(ofp, "%c %9.2g %6.1f %5.1f  %-*s %*d %*d ",
          newness,
          exp(th->hit[h]->lnP), // * pli->Z,
          th->hit[h]->score,
          eslCONST_LOG2R * th->hit[h]->dcl[d].dombias, // an nhmmer hit is really a domain, so this is the hit's bias correction
          namew, showname,
          posw, th->hit[h]->dcl[d].iali,
          posw, th->hit[h]->dcl[d].jali) < 0)
            ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
        }
        else
        {
          if (fprintf(ofp, "%c %9.2g %6.1f %5.1f  %9.2g %6.1f %5.1f  %5.1f %2d  %-*s ",
          newness,
          exp(th->hit[h]->lnP) * pli->Z,
          th->hit[h]->score,
          th->hit[h]->pre_score - th->hit[h]->score, /* bias correction */
          exp(th->hit[h]->dcl[d].lnP) * pli->Z,
          th->hit[h]->dcl[d].bitscore,
          eslCONST_LOG2R * th->hit[h]->dcl[d].dombias, /* convert NATS to BITS at last moment */
          th->hit[h]->nexpected,
          th->hit[h]->nreported,
          namew, showname) < 0)
            ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
        }

        if (textw > 0) 
        {
          if (fprintf(ofp, " %-.*s\n", descw, th->hit[h]->desc == NULL ? "" : th->hit[h]->desc) < 0)
            ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
        }
        else 
        {
          if (fprintf(ofp, " %s\n",           th->hit[h]->desc == NULL ? "" : th->hit[h]->desc) < 0)
            ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
        }
        /* do NOT use *s with unlimited (INT_MAX) line length. Some systems
         * have an fprintf() bug here (we found one on an Opteron/SUSE Linux
         * system (#h66)
         */
    }

    if (th->nreported == 0)
    { 
      if (fprintf(ofp, "\n   [No hits detected that satisfy reporting thresholds]\n") < 0)
        ESL_EXCEPTION_SYS(eslEWRITE, "per-sequence hit list: write failed");
    }
  return eslOK;
}


/* Function:  p7_tophits_Domains()
 * Synopsis:  Standard output format for top domain hits and alignments.
 *
 * Purpose:   For each reportable target sequence, output a tabular summary
 *            of reportable domains found in it, followed by alignments of
 *            each domain.
 * 
 *            Similar to <p7_tophits_Targets()>; see additional notes there.
 *
 * Returns:   <eslOK> on success.
 * 
 * Throws:    <eslEWRITE> if a write to <ofp> fails; for example, if
 *            the disk fills up.
 */
int
p7_tophits_Domains(FILE *ofp, P7_TOPHITS *th, P7_PIPELINE *pli, int textw)
{
  int   h, d;
  int   nd;
  int   namew, descw;
  char *showname;
  int   status;

  if (pli->long_targets) 
  {
      if (fprintf(ofp, "Annotation for each hit %s:\n",
      pli->show_alignments ? " (and alignments)" : "") < 0)
        ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
  }
  else 
  {
      if (fprintf(ofp, "Domain annotation for each %s%s:\n",
      pli->mode == p7_SEARCH_SEQS ? "sequence" : "model",
      pli->show_alignments ? " (and alignments)" : "") < 0)
        ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
  }

  for (h = 0; h < th->N; h++)
    if (th->hit[h]->flags & p7_IS_REPORTED)
    {
      if (pli->show_accessions && th->hit[h]->acc != NULL && th->hit[h]->acc[0] != '\0')
      {
        showname = th->hit[h]->acc;
        namew    = strlen(th->hit[h]->acc);
      }
      else
      {
        showname = th->hit[h]->name;
        namew = strlen(th->hit[h]->name);
      }

      if (textw > 0)
      {
        descw = ESL_MAX(32, textw - namew - 5);
        if (fprintf(ofp, ">> %s  %-.*s\n", showname, descw, (th->hit[h]->desc == NULL ? "" : th->hit[h]->desc)) < 0)
          ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
      }
      else
      {
        if (fprintf(ofp, ">> %s  %s\n",    showname,        (th->hit[h]->desc == NULL ? "" : th->hit[h]->desc)) < 0)
          ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
      }

      if (th->hit[h]->nreported == 0)
      {
        if (fprintf(ofp,"   [No individual domains that satisfy reporting thresholds (although complete target did)]\n\n") < 0)
          ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
        continue;
      }


      if (pli->long_targets) {
        /* The dna hit table is 119 char wide:
    score  bias    Evalue hmmfrom  hmm to     alifrom    ali to      envfrom    env to       hqfrom     hq to   sq len      acc
   ------ ----- --------- ------- -------    --------- ---------    --------- ---------    --------- --------- ---------    ----
 !   82.7 104.4   4.9e-22     782     998 .. 241981174 241980968 .. 241981174 241980966 .. 241981174 241980968 234234233   0.78
        */
        if (fprintf(ofp, "   %6s %5s %9s %9s %9s %2s %9s %9s %2s %9s %9s %2s %9s %9s %9s %2s %4s\n",  "score",  "bias",  "  Evalue", "hmmfrom",  "hmm to", "  ", " alifrom ",  " ali to ", "  ",  " envfrom ",  " env to ", "  ", " hqfrom ",  "  hq to  ", "  sq len ", "  ",  "acc")  < 0)
          ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
        if (fprintf(ofp, "   %6s %5s %9s %9s %9s %2s %9s %9s %2s %9s %9s %2s %9s %9s %9s %2s %4s\n",  "------", "-----", "---------", "-------", "-------", "  ", "---------", "---------", "  ", "---------", "---------", "  ", "---------", "---------", "---------", "  ", "----") < 0)
          ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
      } else {

        /* The domain table is 101 char wide:
     #     score  bias    Evalue hmmfrom   hmmto    alifrom  ali to    envfrom  env to     acc
     ---   ------ ----- --------- ------- -------    ------- -------    ------- -------    ----
     1 ?  123.4  23.1    6.8e-9       3    1230 ..       1     492 []       2     490 .] 0.90
     123 ! 1234.5 123.4 123456789 1234567 1234567 .. 1234567 1234567 [] 1234567 1234568 .] 0.12
        */

        if (fprintf(ofp, " %3s   %6s %5s %9s %9s %7s %7s %2s %7s %7s %2s %7s %7s %2s %4s\n",    "#",  "score",  "bias",  "c-Evalue",  "i-Evalue", "hmmfrom",  "hmm to", "  ", "alifrom",  "ali to", "  ", "envfrom",  "env to", "  ",  "acc")  < 0)
          ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
        if (fprintf(ofp, " %3s   %6s %5s %9s %9s %7s %7s %2s %7s %7s %2s %7s %7s %2s %4s\n",  "---", "------", "-----", "---------", "---------", "-------", "-------", "  ", "-------", "-------", "  ", "-------", "-------", "  ", "----")  < 0)
          ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
      }

      nd = 0;
      for (d = 0; d < th->hit[h]->ndom; d++)
          if (th->hit[h]->dcl[d].is_reported)
          {
            nd++;
            if (pli->long_targets)
            {

               if (fprintf(ofp, " %c %6.1f %5.1f %9.2g %9d %9d %c%c %9ld %9ld %c%c %9d %9d %c%c %9d %9d %9ld    %4.2f\n",
                    //nd,
                    th->hit[h]->dcl[d].is_included ? '!' : '?',
                    th->hit[h]->dcl[d].bitscore,
                    th->hit[h]->dcl[d].dombias * eslCONST_LOG2R, /* convert NATS to BITS at last moment */
                    exp(th->hit[h]->dcl[d].lnP),
                    th->hit[h]->dcl[d].ad->hmmfrom,
                    th->hit[h]->dcl[d].ad->hmmto,
                    (th->hit[h]->dcl[d].ad->hmmfrom == 1) ? '[' : '.',
                    (th->hit[h]->dcl[d].ad->hmmto   == th->hit[h]->dcl[d].ad->M) ? ']' : '.',
                    th->hit[h]->dcl[d].ad->sqfrom,
                    th->hit[h]->dcl[d].ad->sqto,
                    (th->hit[h]->dcl[d].ad->sqfrom == 1) ? '[' : '.',
                    (th->hit[h]->dcl[d].ad->sqto   == th->hit[h]->dcl[d].ad->L) ? ']' : '.',
                    th->hit[h]->dcl[d].ienv,
                    th->hit[h]->dcl[d].jenv,
                    (th->hit[h]->dcl[d].ienv == 1) ? '[' : '.',
                    (th->hit[h]->dcl[d].jenv == th->hit[h]->dcl[d].ad->L) ? ']' : '.',
                    th->hit[h]->dcl[d].ihq,
                    th->hit[h]->dcl[d].jhq,
                    th->hit[h]->dcl[d].ad->L,
                    (th->hit[h]->dcl[d].oasc / (1.0 + fabs((float) (th->hit[h]->dcl[d].jenv - th->hit[h]->dcl[d].ienv))))) < 0)
                         ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");

            }
            else
            {
              if (fprintf(ofp, " %3d %c %6.1f %5.1f %9.2g %9.2g %7d %7d %c%c %7ld %7ld %c%c %7d %7d %c%c %4.2f\n",
                    nd,
                    th->hit[h]->dcl[d].is_included ? '!' : '?',
                    th->hit[h]->dcl[d].bitscore,
                    th->hit[h]->dcl[d].dombias * eslCONST_LOG2R, /* convert NATS to BITS at last moment */
                    exp(th->hit[h]->dcl[d].lnP) * pli->domZ,
                    exp(th->hit[h]->dcl[d].lnP) * pli->Z,
                    th->hit[h]->dcl[d].ad->hmmfrom,
                    th->hit[h]->dcl[d].ad->hmmto,
                    (th->hit[h]->dcl[d].ad->hmmfrom == 1) ? '[' : '.',
                    (th->hit[h]->dcl[d].ad->hmmto   == th->hit[h]->dcl[d].ad->M) ? ']' : '.',
                    th->hit[h]->dcl[d].ad->sqfrom,
                    th->hit[h]->dcl[d].ad->sqto,
                    (th->hit[h]->dcl[d].ad->sqfrom == 1) ? '[' : '.',
                    (th->hit[h]->dcl[d].ad->sqto   == th->hit[h]->dcl[d].ad->L) ? ']' : '.',
                    th->hit[h]->dcl[d].ienv,
                    th->hit[h]->dcl[d].jenv,
                    (th->hit[h]->dcl[d].ienv == 1) ? '[' : '.',
                    (th->hit[h]->dcl[d].jenv == th->hit[h]->dcl[d].ad->L) ? ']' : '.',
                    (th->hit[h]->dcl[d].oasc / (1.0 + fabs((float) (th->hit[h]->dcl[d].jenv - th->hit[h]->dcl[d].ienv))))) < 0)
                        ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
            }
          }

          if (pli->show_alignments)
          {
            if (pli->long_targets)
            {
              if (fprintf(ofp, "\n  Alignment:\n") < 0)
                ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
            }
            else
            {
              if (fprintf(ofp, "\n  Alignments for each domain:\n") < 0)
                ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
              nd = 0;
            }

            for (d = 0; d < th->hit[h]->ndom; d++)
              if (th->hit[h]->dcl[d].is_reported)
              {
                nd++;
                if (!pli->long_targets)
                {
                  if (fprintf(ofp, "  == domain %d", nd ) < 0)
                    ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
                }
                if (fprintf(ofp, "  score: %.1f bits", th->hit[h]->dcl[d].bitscore) < 0)
                  ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
                if (!pli->long_targets)
                {
                  if (fprintf(ofp, ";  conditional E-value: %.2g\n",  exp(th->hit[h]->dcl[d].lnP) * pli->domZ) < 0)
                    ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
                }
                else
                {
                  if (fprintf(ofp, "\n") < 0)
                    ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
                }

                if ((status = p7_alidisplay_Print(ofp, th->hit[h]->dcl[d].ad, 40, textw, pli->show_accessions)) != eslOK) return status;

                if (fprintf(ofp, "\n") < 0)
                  ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
              }
          }
          else
          { 
            if (fprintf(ofp, "\n") < 0)
              ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
          }
    }

    if (th->nreported == 0)
    {
      if (fprintf(ofp, "\n   [No targets detected that satisfy reporting thresholds]\n") < 0) 
        ESL_EXCEPTION_SYS(eslEWRITE, "domain hit list: write failed");
    }
    return eslOK;
}


/* Function:  p7_tophits_Alignment()
 * Synopsis:  Create a multiple alignment of all the included domains.
 *
 * Purpose:   Create a multiple alignment of all domains marked
 *            "includable" in the top hits list <th>, and return it in
 *            <*ret_msa>.
 *            
 *            Use of <optflags> is identical to <optflags> in <p7_tracealign_Seqs()>.
 *            Possible flags include <p7_DIGITIZE>, <p7_ALL_CONSENSUS_COLS>,
 *            and <p7_TRIM>; they may be OR'ed together. Otherwise, pass
 *            <p7_DEFAULT> to set no flags.
 *
 *            Caller may optionally provide <inc_sqarr>, <inc_trarr>, and
 *            <inc_n> to include additional sequences in the alignment
 *            (the jackhmmer query, for example). Otherwise, pass <NULL, NULL, 0>.
 *
 * Returns:   <eslOK> on success, and <*ret_msa> points to a new MSA that
 *            the caller is responsible for freeing.
 *
 *            Returns <eslFAIL> if there are no reported domains that
 *            satisfy reporting thresholds, in which case <*ret_msa>
 *            is <NULL>.
 *
 * Throws:    <eslEMEM> on allocation failure; <eslECORRUPT> on 
 *            unexpected internal data corruption.
 *
 * Xref:      J4/29: incept.
 *            J4/76: added inc_sqarr, inc_trarr, inc_n, optflags 
 */
int
p7_tophits_Alignment(const P7_TOPHITS *th, const ESL_ALPHABET *abc, 
         ESL_SQ **inc_sqarr, P7_TRACE **inc_trarr, int inc_n,
         int optflags, ESL_MSA **ret_msa)
{
  ESL_SQ   **sqarr = NULL;
  P7_TRACE **trarr = NULL;
  ESL_MSA   *msa   = NULL;
  int        ndom  = 0;
  int        h, d, y;
  int        M;
  int        status;

  /* How many domains will be included in the new alignment? 
   * We also set model size M here; every alignment has a copy.
   */
  for (h = 0; h < th->N; h++)
    if (th->hit[h]->flags & p7_IS_INCLUDED)
    {
        for (d = 0; d < th->hit[h]->ndom; d++)
          if (th->hit[h]->dcl[d].is_included)
            ndom++;
    }

  if (inc_n+ndom == 0) { status = eslFAIL; goto ERROR; }

  if (inc_n)     M = inc_trarr[0]->M;          
  else           M = th->hit[0]->dcl[0].ad->M;
  
  /* Allocation */
  ESL_ALLOC(sqarr, sizeof(ESL_SQ *)   * (ndom + inc_n));
  ESL_ALLOC(trarr, sizeof(P7_TRACE *) * (ndom + inc_n));
  /* Inclusion of preexisting seqs, traces: make copy of pointers */
  for (y = 0; y < inc_n;        y++) { sqarr[y] = inc_sqarr[y];  trarr[y] = inc_trarr[y]; }
  for (;      y < (ndom+inc_n); y++) { sqarr[y] = NULL;          trarr[y] = NULL; }

  /* Make faux sequences, traces from hit list */
  y = inc_n;
  for (h = 0; h < th->N; h++)
    if (th->hit[h]->flags & p7_IS_INCLUDED)
    {
        for (d = 0; d < th->hit[h]->ndom; d++)
          if (th->hit[h]->dcl[d].is_included)
          {
              if ((status = p7_alidisplay_Backconvert(th->hit[h]->dcl[d].ad, abc, &(sqarr[y]), &(trarr[y]))) != eslOK) goto ERROR;
              y++;
          }
    }
  
  /* Make the multiple alignment */
  if ((status = p7_tracealign_Seqs(sqarr, trarr, inc_n+ndom, M, optflags, NULL, &msa)) != eslOK) goto ERROR;

  /* Clean up */
  for (y = inc_n; y < ndom+inc_n; y++) esl_sq_Destroy(sqarr[y]);
  for (y = inc_n; y < ndom+inc_n; y++) p7_trace_Destroy(trarr[y]);
  free(sqarr);
  free(trarr);
  *ret_msa = msa;
  return eslOK;
  
 ERROR:
  if (sqarr != NULL) { for (y = inc_n; y < ndom+inc_n; y++) if (sqarr[y] != NULL) esl_sq_Destroy(sqarr[y]);   free(sqarr); }
  if (trarr != NULL) { for (y = inc_n; y < ndom+inc_n; y++) if (trarr[y] != NULL) p7_trace_Destroy(trarr[y]); free(trarr); }
  if (msa   != NULL) esl_msa_Destroy(msa);
  *ret_msa = NULL;
  return status;
}
/*---------------- end, standard output format ------------------*/





/*****************************************************************
 * 3. Tabular (parsable) output of pipeline results.
 *****************************************************************/

/* Function:  p7_tophits_TabularTargets()
 * Synopsis:  Output parsable table of per-sequence hits.
 *
 * Purpose:   Output a parseable table of reportable per-sequence hits
 *            in sorted tophits list <th> in an easily parsed ASCII
 *            tabular form to stream <ofp>, using final pipeline
 *            accounting stored in <pli>.
 *            
 *            Designed to be concatenated for multiple queries and
 *            multiple top hits list.
 *
 * Returns:   <eslOK> on success.
 * 
 * Throws:    <eslEWRITE> if a write to <ofp> fails; for example, if
 *            the disk fills up.
 */
int
p7_tophits_TabularTargets(FILE *ofp, char *qname, char *qacc, P7_TOPHITS *th, P7_PIPELINE *pli, int show_header)
{
  int qnamew = ESL_MAX(20, strlen(qname));
  int tnamew = ESL_MAX(20, p7_tophits_GetMaxNameLength(th));
  int qaccw  = ((qacc != NULL) ? ESL_MAX(10, strlen(qacc)) : 10);
  int taccw  = ESL_MAX(10, p7_tophits_GetMaxAccessionLength(th));
  int posw   = (pli->long_targets ? ESL_MAX(7, p7_tophits_GetMaxPositionLength(th)) : 0);
  int h,d;

  if (show_header)
  {
      if (pli->long_targets) 
      {
        if (fprintf(ofp, "#%-*s %-*s %-*s %-*s %s %s %*s %*s %*s %*s %*s %*s %*s %6s %9s %6s %5s  %s\n",
          tnamew-1, " target name",        taccw, "accession",  qnamew, "query name",           qaccw, "accession", "hmmfrom", "hmm to", posw, "alifrom", posw, "ali to", posw, "envfrom", posw, "env to", posw, "hq from", posw, "hq to", posw, "sq len", "strand", "  E-value", " score", " bias", "description of target") < 0)
          ESL_EXCEPTION_SYS(eslEWRITE, "tabular per-sequence hit list: write failed");
        if (fprintf(ofp, "#%*s %*s %*s %*s %s %s %*s %*s %*s %*s %*s %*s %*s %6s %9s %6s %5s %s\n",
          tnamew-1, "-------------------", taccw, "----------", qnamew, "--------------------", qaccw, "----------", "-------", "-------", posw, "-------", posw, "-------",  posw, "-------", posw, "-------", posw, "-------", posw, "-------", posw, "-------", "------", "---------", "------", "-----", "---------------------") < 0)
          ESL_EXCEPTION_SYS(eslEWRITE, "tabular per-per-sequence hit list: write failed");
      }
      else
      {
        if (fprintf(ofp, "#%*s %22s %22s %33s\n", tnamew+qnamew+taccw+qaccw+2, "", "--- full sequence ----", "--- best 1 domain ----", "--- domain number estimation ----") < 0)
          ESL_EXCEPTION_SYS(eslEWRITE, "tabular per-sequence hit list: write failed");
        if (fprintf(ofp, "#%-*s %-*s %-*s %-*s %9s %6s %5s %9s %6s %5s %5s %3s %3s %3s %3s %3s %3s %3s %s\n",
          tnamew-1, " target name",        taccw, "accession",  qnamew, "query name",           qaccw, "accession",  "  E-value", " score", " bias", "  E-value", " score", " bias", "exp", "reg", "clu", " ov", "env", "dom", "rep", "inc", "description of target") < 0)
          ESL_EXCEPTION_SYS(eslEWRITE, "tabular per-sequence hit list: write failed");
        if (fprintf(ofp, "#%*s %*s %*s %*s %9s %6s %5s %9s %6s %5s %5s %3s %3s %3s %3s %3s %3s %3s %s\n",
          tnamew-1, "-------------------", taccw, "----------", qnamew, "--------------------", qaccw, "----------", "---------", "------", "-----", "---------", "------", "-----", "---", "---", "---", "---", "---", "---", "---", "---", "---------------------") < 0)
          ESL_EXCEPTION_SYS(eslEWRITE, "tabular per-sequence hit list: write failed");
      }
  }

  for (h = 0; h < th->N; h++)
    if (th->hit[h]->flags & p7_IS_REPORTED)    
    {
        d    = th->hit[h]->best_domain;
        if (pli->long_targets) 
        {
            if (fprintf(ofp, "%-*s %-*s %-*s %-*s %7d %7d %*d %*d %*d %*d %*d %*d %*ld %6s %9.2g %6.1f %5.1f  %s\n",
                tnamew, th->hit[h]->name,
                taccw,  th->hit[h]->acc ? th->hit[h]->acc : "-",
                qnamew, qname,
                qaccw,  ( (qacc != NULL && qacc[0] != '\0') ? qacc : "-"),
                th->hit[h]->dcl[d].ad->hmmfrom,
                th->hit[h]->dcl[d].ad->hmmto,
                posw, th->hit[h]->dcl[d].iali,
                posw, th->hit[h]->dcl[d].jali,
                posw, th->hit[h]->dcl[d].ienv,
                posw, th->hit[h]->dcl[d].jenv,
                posw, th->hit[h]->dcl[d].ihq,
                posw, th->hit[h]->dcl[d].jhq,
                posw, th->hit[h]->dcl[0].ad->L,
                (th->hit[h]->dcl[d].iali < th->hit[h]->dcl[d].jali ? "   +  "  :  "   -  "),
                exp(th->hit[h]->lnP),
                th->hit[h]->score,
                th->hit[h]->dcl[d].dombias * eslCONST_LOG2R, /* convert NATS to BITS at last moment */
                th->hit[h]->desc == NULL ? "-" :  th->hit[h]->desc ) < 0)
                  ESL_EXCEPTION_SYS(eslEWRITE, "tabular per-sequence hit list: write failed");
        }
        else
        {
                if (fprintf(ofp, "%-*s %-*s %-*s %-*s %9.2g %6.1f %5.1f %9.2g %6.1f %5.1f %5.1f %3d %3d %3d %3d %3d %3d %3d %s\n",
                tnamew, th->hit[h]->name,
                taccw,  th->hit[h]->acc ? th->hit[h]->acc : "-",
                qnamew, qname,
                qaccw,  ( (qacc != NULL && qacc[0] != '\0') ? qacc : "-"),
                exp(th->hit[h]->lnP) * pli->Z,
                th->hit[h]->score,
                th->hit[h]->pre_score - th->hit[h]->score, /* bias correction */
                exp(th->hit[h]->dcl[d].lnP) * pli->Z,
                th->hit[h]->dcl[d].bitscore,
                th->hit[h]->dcl[d].dombias * eslCONST_LOG2R, /* convert NATS to BITS at last moment */
                th->hit[h]->nexpected,
                th->hit[h]->nregions,
                th->hit[h]->nclustered,
                th->hit[h]->noverlaps,
                th->hit[h]->nenvelopes,
                th->hit[h]->ndom,
                th->hit[h]->nreported,
                th->hit[h]->nincluded,
                (th->hit[h]->desc == NULL ? "-" : th->hit[h]->desc)) < 0)
                  ESL_EXCEPTION_SYS(eslEWRITE, "tabular per-sequence hit list: write failed");
        }
    }
  return eslOK;
}


/* Function:  p7_tophits_TabularDomains()
 * Synopsis:  Output parseable table of per-domain hits
 *
 * Purpose:   Output a parseable table of reportable per-domain hits
 *            in sorted tophits list <th> in an easily parsed ASCII
 *            tabular form to stream <ofp>, using final pipeline
 *            accounting stored in <pli>.
 *            
 *            Designed to be concatenated for multiple queries and
 *            multiple top hits list.
 *
 * Returns:   <eslOK> on success.
 * 
 * Throws:    <eslEWRITE> if a write to <ofp> fails; for example, if
 *            the disk fills up.
 */
int
p7_tophits_TabularDomains(FILE *ofp, char *qname, char *qacc, P7_TOPHITS *th, P7_PIPELINE *pli, int show_header)
{

  int qnamew = ESL_MAX(20, strlen(qname));
  int tnamew = ESL_MAX(20, p7_tophits_GetMaxNameLength(th));
  int qaccw  = (qacc ? ESL_MAX(10, strlen(qacc)) : 10);
  int taccw  = ESL_MAX(10, p7_tophits_GetMaxAccessionLength(th));
  int tlen, qlen;
  int h,d,nd;

  if (show_header)
    {
      if (fprintf(ofp, "#%*s %22s %40s %11s %11s %11s\n", tnamew+qnamew-1+15+taccw+qaccw, "",                                   "--- full sequence ---",        "-------------- this domain -------------",                "hmm coord",      "ali coord",     "env coord") < 0)
        ESL_EXCEPTION_SYS(eslEWRITE, "tabular per-domain hit list: write failed");
      if (fprintf(ofp, "#%-*s %-*s %5s %-*s %-*s %5s %9s %6s %5s %3s %3s %9s %9s %6s %5s %5s %5s %5s %5s %5s %5s %4s %s\n",
      tnamew-1, " target name",        taccw, "accession",  "tlen",  qnamew, "query name",           qaccw, "accession",  "qlen",  "E-value",   "score",  "bias",  "#",   "of",  "c-Evalue",  "i-Evalue",  "score",  "bias",  "from",  "to",    "from",  "to",   "from",   "to",    "acc",  "description of target") < 0)
        ESL_EXCEPTION_SYS(eslEWRITE, "tabular per-domain hit list: write failed");
      if (fprintf(ofp, "#%*s %*s %5s %*s %*s %5s %9s %6s %5s %3s %3s %9s %9s %6s %5s %5s %5s %5s %5s %5s %5s %4s %s\n", 
      tnamew-1, "-------------------", taccw, "----------", "-----", qnamew, "--------------------", qaccw, "----------", "-----", "---------", "------", "-----", "---", "---", "---------", "---------", "------", "-----", "-----", "-----", "-----", "-----", "-----", "-----", "----", "---------------------") < 0)
        ESL_EXCEPTION_SYS(eslEWRITE, "tabular per-domain hit list: write failed");
    }

  for (h = 0; h < th->N; h++)
    if (th->hit[h]->flags & p7_IS_REPORTED)
    {
        nd = 0;
        for (d = 0; d < th->hit[h]->ndom; d++)
          if (th->hit[h]->dcl[d].is_reported)
          {
              nd++;

              /* in hmmsearch, targets are seqs and queries are HMMs;
               * in hmmscan, the reverse.  but in the ALIDISPLAY
               * structure, lengths L and M are for seq and HMMs, not
               * for query and target, so sort it out.
               */
              if (pli->mode == p7_SEARCH_SEQS) { qlen = th->hit[h]->dcl[d].ad->M; tlen = th->hit[h]->dcl[d].ad->L;  }
              else                             { qlen = th->hit[h]->dcl[d].ad->L; tlen = th->hit[h]->dcl[d].ad->M;  }

              if (fprintf(ofp, "%-*s %-*s %5d %-*s %-*s %5d %9.2g %6.1f %5.1f %3d %3d %9.2g %9.2g %6.1f %5.1f %5d %5d %5ld %5ld %5d %5d %4.2f %s\n",
                tnamew, th->hit[h]->name,
                taccw,  th->hit[h]->acc ? th->hit[h]->acc : "-",
                tlen,
                qnamew, qname,
                qaccw,  ( (qacc != NULL && qacc[0] != '\0') ? qacc : "-"),
                qlen,
                exp(th->hit[h]->lnP) * pli->Z,
                th->hit[h]->score,
                th->hit[h]->pre_score - th->hit[h]->score, /* bias correction */
                nd,
                th->hit[h]->nreported,
                exp(th->hit[h]->dcl[d].lnP) * pli->domZ,
                exp(th->hit[h]->dcl[d].lnP) * pli->Z,
                th->hit[h]->dcl[d].bitscore,
                th->hit[h]->dcl[d].dombias * eslCONST_LOG2R, /* NATS to BITS at last moment */
                th->hit[h]->dcl[d].ad->hmmfrom,
                th->hit[h]->dcl[d].ad->hmmto,
                th->hit[h]->dcl[d].ad->sqfrom,
                th->hit[h]->dcl[d].ad->sqto,
                th->hit[h]->dcl[d].ienv,
                th->hit[h]->dcl[d].jenv,
                (th->hit[h]->dcl[d].oasc / (1.0 + fabs((float) (th->hit[h]->dcl[d].jenv - th->hit[h]->dcl[d].ienv)))),
                (th->hit[h]->desc ?  th->hit[h]->desc : "-")) < 0)
                  ESL_EXCEPTION_SYS(eslEWRITE, "tabular per-domain hit list: write failed");
          }
      }
  return eslOK;
}


/* Function:  p7_tophits_TabularXfam()
 * Synopsis:  Output parsable table(s) of hits, in format desired by Xfam.
 *
 * Purpose:   Output a parseable table of reportable hits in sorted
 *            tophits list <th> in an easily parsed ASCII tabular
 *            form to stream <ofp>, using final pipeline accounting
 *            stored in <pli>.
 *
 *            For long-target nucleotide queries, this will print the
 *            same hits as p7_tophits_TabularTargets(), but with the
 *            smaller number of (reordered) fields required by Dfam
 *            scripts.
 *
 *            For protein queries, this will print two tables:
 *            (a) per-sequence hits as presented by
 *                p7_tophits_TabularTargets(), but formatted for
 *                Pfam scripts;
 *            (b) per-domain hits, similar to those presented by
 *                p7_tophits_TabularDomains(), but sorted by
 *                score/e-value, and formated for Pfam scripts.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation failure.
 *            <eslEWRITE> if a write to <ofp> fails; for example, if
 *            the disk fills up.
 */
int
p7_tophits_TabularXfam(FILE *ofp, char *qname, char *qacc, P7_TOPHITS *th, P7_PIPELINE *pli)
{
  P7_TOPHITS *domHitlist = NULL;
  P7_HIT     *domhit     = NULL;
  int         tnamew     = ESL_MAX(20, p7_tophits_GetMaxNameLength(th));
  int         ndom       = 0;
  int         posw       = (pli->long_targets ? ESL_MAX(7, p7_tophits_GetMaxPositionLength(th)) : 0);
  int         h,d;
  int         status;


  if (pli->long_targets) 
  {
      if (fprintf(ofp, "# Hit scores\n# ----------\n#\n") < 0)
        ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");
      if (fprintf(ofp, "# %-*s %6s %9s %5s  %s  %s %6s %*s %*s %*s %*s %*s %*s %*s   %s\n",
      tnamew-1, "name", " bits", "  E-value", " bias", "hmm-st", "hmm-en", "strand", posw, "ali-st", posw, "ali-en", posw, "env-st", posw, "env-en", posw, "hq-st", posw, "hq-en", posw, "sq-len", "description of target") < 0)
        ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");
      if (fprintf(ofp, "# %*s %6s %9s %5s %s %s %6s %*s %*s %*s %*s %*s %*s %*s   %s\n",
      tnamew-1, "-------------------",  "------",  "---------", "-----", "-------", "-------", "------", posw, "-------", posw, "-------",  posw, "-------", posw, "-------", posw, "-------", posw, "-------", posw, "-------", "---------------------") < 0)
        ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");

      for (h = 0; h < th->N; h++) 
        if (th->hit[h]->flags & p7_IS_REPORTED)
        {
            //d    = th->hit[h]->best_domain;
            if (fprintf(ofp, "%-*s  %6.1f %9.2g %5.1f %7d %7d %s %*d %*d %*d %*d %*d %*d %*ld   %s\n",
            tnamew, th->hit[h]->name,
            th->hit[h]->score,
            exp(th->hit[h]->lnP),
            th->hit[h]->dcl[0].dombias * eslCONST_LOG2R, /* convert NATS to BITS at last moment */
            th->hit[h]->dcl[0].ad->hmmfrom,
            th->hit[h]->dcl[0].ad->hmmto,
            (th->hit[h]->dcl[0].iali < th->hit[h]->dcl[0].jali ? "   +  "  :  "   -  "),
            posw, th->hit[h]->dcl[0].iali,
            posw, th->hit[h]->dcl[0].jali,
            posw, th->hit[h]->dcl[0].ienv,
            posw, th->hit[h]->dcl[0].jenv,
            posw, th->hit[h]->dcl[0].ihq,
            posw, th->hit[h]->dcl[0].jhq,
            posw, th->hit[h]->dcl[0].ad->L,
            th->hit[h]->desc == NULL ?  "-" : th->hit[h]->desc) < 0)
              ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");
        }
  }
  else 
  {
      if (fprintf(ofp, "# Sequence scores\n# ---------------\n#\n") < 0)
        ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");
      if (fprintf(ofp, "# %-*s %6s %9s %3s %5s %5s    %s\n",
      tnamew-1, "name",  " bits", "  E-value", "n",  "exp", " bias", "description") < 0)
        ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");
      if (fprintf(ofp, "# %*s %6s %9s %3s %5s %5s    %s\n",
      tnamew-1, "-------------------",  "------", "---------","---", "-----",  "-----", "---------------------") < 0)
        ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");

      for (h = 0; h < th->N; h++) 
      {
        if (th->hit[h]->flags & p7_IS_REPORTED)
        {
          if (fprintf(ofp, "%-*s  %6.1f %9.2g %3d %5.1f %5.1f    %s\n",
          tnamew, th->hit[h]->name,
          th->hit[h]->score,
          exp(th->hit[h]->lnP) * pli->Z,
          th->hit[h]->ndom,
          th->hit[h]->nexpected,
          th->hit[h]->pre_score - th->hit[h]->score, /* bias correction */
          (th->hit[h]->desc == NULL ? "-" : th->hit[h]->desc)) < 0)
            ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");

          for (d = 0; d < th->hit[h]->ndom; d++)
            if (th->hit[h]->dcl[d].is_reported)
              ndom ++;
        }
      }
      if (fprintf(ofp, "\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");

      /* Need to sort the domains.  One way to do this is to re-use the hit sorting machinery,
       * so we create one "hit" for each domain, then hand it off to the sorter
       */
      if ((domHitlist  = p7_tophits_Create()) == NULL) return eslEMEM;
      for (h = 0; h < th->N; h++)
      {
        if (th->hit[h]->flags & p7_IS_REPORTED)
        {
          int ndomReported = 0;
          for (d = 0; d < th->hit[h]->ndom; d++)
          {
            if (th->hit[h]->dcl[d].is_reported)
            {
              p7_tophits_CreateNextHit(domHitlist, &domhit);
              ndomReported++;
              ESL_ALLOC(domhit->dcl, sizeof(P7_DOMAIN) );

              domhit->ndom       = ndomReported;  // re-using this variable to track the ordinal value of the domain in the original hit list that generated this pseudo-hit
              domhit->name       = th->hit[h]->name;
              domhit->desc       = th->hit[h]->desc;
              domhit->dcl[0]     = th->hit[h]->dcl[d];
              domhit->sortkey    = pli->inc_by_E ? -1.0 * th->hit[h]->dcl[d].lnP : th->hit[h]->dcl[d].bitscore;
            }
          }
        }
      }
      p7_tophits_SortBySortkey(domHitlist);

      // Now with this list of sorted "hits" (really domains)
      if (fprintf(ofp, "# Domain scores\n# -------------\n#\n") < 0)
        ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");
      if (fprintf(ofp, "# %-*s %6s %9s %5s %5s %6s %6s %6s %6s %6s %6s     %s\n",
      tnamew-1, " name",  "bits", "E-value", "hit", "bias",      "env-st",  "env-en",  "ali-st",  "ali-en",  "hmm-st",  "hmm-en",   "description") < 0)
        ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");
      if (fprintf(ofp, "# %*s %6s %9s %5s %5s %6s %6s %6s %6s %6s %6s      %s\n",
      tnamew-1, "-------------------",  "------", "---------", "-----", "-----", "------", "------", "------", "------", "------", "------", "---------------------") < 0)
        ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");

      for (h = 0; h < domHitlist->N; h++)
      {
        domhit = domHitlist->hit[h];

        if (fprintf(ofp, "%-*s  %6.1f %9.2g %5d %5.1f %6d %6d %6ld %6ld %6d %6d     %s\n",
              tnamew, domHitlist->hit[h]->name,
              domhit->dcl[0].bitscore,
              exp(domhit->dcl[0].lnP) * pli->Z, //i-Evalue
              domhit->ndom,
              domhit->dcl[0].dombias * eslCONST_LOG2R, // NATS to BITS at last moment
              domhit->dcl[0].ienv,
              domhit->dcl[0].jenv,
              domhit->dcl[0].ad->sqfrom,
              domhit->dcl[0].ad->sqto,
              domhit->dcl[0].ad->hmmfrom,
              domhit->dcl[0].ad->hmmto,
              (domhit->desc ?  domhit->desc : "-")) < 0)
                ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");
      }
      free (domHitlist->unsrt);
      free (domHitlist->hit);
      free (domHitlist);
  }
  return eslOK;

 ERROR:
  if (domHitlist) 
  {
      free (domHitlist->unsrt);
      free (domHitlist->hit);
      free (domHitlist);
  }
  return status;
}


/* Function:  p7_tophits_LongInserts()
 * Synopsis:  Output list of long inserts for each query/hit pair
 *
 * Purpose:
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation failure.
 *            <eslEWRITE> if a write to <ofp> fails; for example, if
 *            the disk fills up.
 */
int
p7_tophits_LongInserts(FILE *ofp, char *qname, char *qacc, P7_TOPHITS *th, P7_PIPELINE *pli, int min_length)
{
  int         h,i,j,k;
  int         insert_len;
  int         status;
  int qnamew = ESL_MAX(20, strlen(qname));
  int tnamew = ESL_MAX(20, p7_tophits_GetMaxNameLength(th));
  int qaccw  = ((qacc != NULL) ? ESL_MAX(10, strlen(qacc)) : 10);
  int taccw  = ESL_MAX(10, p7_tophits_GetMaxAccessionLength(th));

  P7_HIT *hit;

  if (fprintf(ofp, "# Long inserts (at least length %d)\n# ------------\n#\n", min_length) < 0)
    ESL_XEXCEPTION_SYS(eslEWRITE, "long insert output: write failed");

  if (fprintf(ofp, "#%-*s %-*s %-*s %-*s %s %s\n",
    tnamew-1, " target name",        taccw, "accession",  qnamew, "query name",           qaccw, "accession", "insert from", "insert to") < 0)
    ESL_EXCEPTION_SYS(eslEWRITE, "long insert output: write failed");

  if (fprintf(ofp, "#%*s %*s %*s %*s %s %s\n",
    tnamew-1, "-------------------", taccw, "----------", qnamew, "--------------------", qaccw, "----------", "----------", "----------") < 0)
    ESL_EXCEPTION_SYS(eslEWRITE, "long insert output: write failed");


  for (h = 0; h < th->N; h++) {
    hit = th->hit[h];
    if (hit->flags & p7_IS_REPORTED)
    {
      j = hit->dcl[0].ad->sqfrom;
      k = hit->dcl[0].ad->hmmfrom;
      insert_len = 0;
      for (i=0; k<=hit->dcl[0].ad->hmmto && i < hit->dcl[0].ad->N; i++) {
//        printf("%c",hit->dcl[0].ad->model[i]);

        if (hit->dcl[0].ad->model[i] == '.') {
          insert_len++;
        } else {
          if (insert_len >= min_length) {
            int start = j;
            start -= (insert_len-1) * (hit->dcl[0].ad->sqfrom < hit->dcl[0].ad->sqto ? 1 : -1 );
            if (fprintf(ofp, "%-*s %-*s %-*s %-*s %7d %7d\n",
                tnamew, hit->name,
                taccw, hit->acc,
                qnamew, qname,
                qaccw, qacc,
                start,
                j
                 ) < 0)
              ESL_XEXCEPTION_SYS(eslEWRITE, "xfam tabular output: write failed");
          }
          k++;
          insert_len = 0;
        }

        if (hit->dcl[0].ad->aseq[i] != '-') {
          j +=   (hit->dcl[0].ad->sqfrom < hit->dcl[0].ad->sqto ? 1 : -1 );
        }
      }

    }

  }
  return eslOK;

 ERROR:
  return status;
}



/* Function:  p7_tophits_TabularTail()
 * Synopsis:  Print a trailer on a tabular output file.
 *
 * Purpose:   Print some metadata as a trailer on a tabular output file:
 *            date/time, the program, HMMER3 version info, the pipeline mode (SCAN or SEARCH), 
 *            the query and target filenames, a spoof commandline
 *            recording the entire program configuration, and
 *            a "fini!" that's useful for detecting successful
 *            output completion.
 *
 * Args:      ofp       - open tabular output file (either --tblout or --domtblout)
 *            progname  - "hmmscan", for example
 *            pipemode  - p7_SEARCH_SEQS | p7_SCAN_MODELS
 *            qfile     - name of query file, or '-' for stdin, or '[none]' if NULL
 *            tfile     - name of target file, or '-' for stdin, or '[none]' if NULL
 *            go        - program configuration; used to generate spoofed command line
 *
 * Returns:   <eslOK>.
 *
 * Throws:    <eslEMEM> on allocation failure.
 *            <eslESYS> if time() or ctime_r() system calls fail.
 *            <eslEWRITE> on write failure.
 *                        
 * Xref:      SRE:J7/54
 */
int
p7_tophits_TabularTail(FILE *ofp, const char *progname, enum p7_pipemodes_e pipemode, const char *qfile, const char *tfile, const ESL_GETOPTS *go)
{
   time_t date           = time(NULL);
   char  *spoof_cmd      = NULL;
   char  *cwd            = NULL;
   char   timestamp[32];
   char   modestamp[16];
   int    status;


  if ((status = esl_opt_SpoofCmdline(go, &spoof_cmd)) != eslOK) goto ERROR;
  if (date == -1)                                               ESL_XEXCEPTION(eslESYS, "time() failed");
  if ((ctime_r(&date, timestamp)) == NULL)                      ESL_XEXCEPTION(eslESYS, "ctime_r() failed");
  switch (pipemode) {
    case p7_SEARCH_SEQS: strcpy(modestamp, "SEARCH"); break;
    case p7_SCAN_MODELS: strcpy(modestamp, "SCAN");   break;
    default:             ESL_EXCEPTION(eslEINCONCEIVABLE, "wait, what? no such pipemode");
  }
  esl_getcwd(&cwd);

  if (fprintf(ofp, "#\n") < 0)                                                                    ESL_XEXCEPTION_SYS(eslEWRITE, "tabular output tail, write failed");
  if (fprintf(ofp, "# Program:         %s\n",      (progname == NULL) ? "[none]" : progname) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "tabular output tail, write failed");
  if (fprintf(ofp, "# Version:         %s (%s)\n", HMMER_VERSION, HMMER_DATE) < 0)                ESL_XEXCEPTION_SYS(eslEWRITE, "tabular output tail, write failed");
  if (fprintf(ofp, "# Pipeline mode:   %s\n",      modestamp) < 0)                                ESL_XEXCEPTION_SYS(eslEWRITE, "tabular output tail, write failed");
  if (fprintf(ofp, "# Query file:      %s\n",      (qfile    == NULL) ? "[none]" : qfile) < 0)    ESL_XEXCEPTION_SYS(eslEWRITE, "tabular output tail, write failed");
  if (fprintf(ofp, "# Target file:     %s\n",      (tfile    == NULL) ? "[none]" : tfile) < 0)    ESL_XEXCEPTION_SYS(eslEWRITE, "tabular output tail, write failed");
  if (fprintf(ofp, "# Option settings: %s\n",      spoof_cmd) < 0)                                ESL_XEXCEPTION_SYS(eslEWRITE, "tabular output tail, write failed");
  if (fprintf(ofp, "# Current dir:     %s\n",      (cwd      == NULL) ? "[unknown]" : cwd) < 0)   ESL_XEXCEPTION_SYS(eslEWRITE, "tabular output tail, write failed");
  if (fprintf(ofp, "# Date:            %s",        timestamp) < 0) /* timestamp ends in \n */     ESL_XEXCEPTION_SYS(eslEWRITE, "tabular output tail, write failed");
  if (fprintf(ofp, "# [ok]\n") < 0)                                                               ESL_XEXCEPTION_SYS(eslEWRITE, "tabular output tail, write failed");

  free(spoof_cmd);
  if (cwd) free(cwd);
  return eslOK;

 ERROR:
  if (spoof_cmd) free(spoof_cmd);
  if (cwd)       free(cwd);
  return status;
}
/*------------------- end, tabular output -----------------------*/




/*****************************************************************
 * 4. Benchmark driver
 *****************************************************************/
#ifdef p7TOPHITS_BENCHMARK
/* 
  gcc -o benchmark-tophits -std=gnu99 -g -O2 -I. -L. -I../easel -L../easel -Dp7TOPHITS_BENCHMARK p7_tophits.c -lhmmer -leasel -lm 
  ./benchmark-tophits

  As of 28 Dec 07, shows 0.20u for 10 lists of 10,000 hits each (at least ~100x normal expectation),
  so we expect top hits list time to be negligible for typical hmmsearch/hmmscan runs.
  
  If needed, we do have opportunity for optimization, however - especially in memory handling.
 */
#include "p7_config.h"

#include "easel.h"
#include "esl_getopts.h"
#include "esl_stopwatch.h"
#include "esl_random.h"

#include "hmmer.h"

static ESL_OPTIONS options[] = {
  /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
  { "-h",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
  { "-s",        eslARG_INT,     "42", NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                    0 },
  { "-M",        eslARG_INT,     "10", NULL, NULL,  NULL,  NULL, NULL, "number of top hits lists to simulate and merge",   0 },
  { "-N",        eslARG_INT,  "10000", NULL, NULL,  NULL,  NULL, NULL, "number of top hits to simulate",                   0 },
  {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
};
static char usage[]  = "[-options]";
static char banner[] = "benchmark driver for P7_TOPHITS";

int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go       = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_STOPWATCH  *w        = esl_stopwatch_Create();
  ESL_RANDOMNESS *r        = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  int             N        = esl_opt_GetInteger(go, "-N");
  int             M        = esl_opt_GetInteger(go, "-M");
  P7_TOPHITS    **h        = NULL;
  double         *sortkeys = NULL;
  char            name[]   = "not_unique_name";
  char            acc[]    = "not_unique_acc";
  char            desc[]   = "Test description for the purposes of making the benchmark allocate space";
  int             i,j;
  int             status;

  /* prep work: generate our sort keys before starting to time anything    */
  ESL_ALLOC(h,        sizeof(P7_TOPHITS *) * M); /* allocate pointers for M lists */
  ESL_ALLOC(sortkeys, sizeof(double) * N * M);   
  for (i = 0; i < N*M; i++) sortkeys[i] = esl_random(r);

  esl_stopwatch_Start(w);

  /* generate M "random" lists and sort them */
  for (j = 0; j < M; j++)
  {
      h[j] = p7_tophits_Create();
      for (i = 0; i < N; i++)
        p7_tophits_Add(h[j], name, acc, desc, sortkeys[j*N + i],
            (float) sortkeys[j*N+i], sortkeys[j*N+i],
            (float) sortkeys[j*N+i], sortkeys[j*N+i],
            i, i, N,
            i, i, N,
            i, N, NULL);
      p7_tophits_SortBySortkey(h[j]);
  }
  /* then merge them into one big list in h[0] */
  for (j = 1; j < M; j++)
  {
      p7_tophits_Merge(h[0], h[j]);
      p7_tophits_Destroy(h[j]);
  }

  esl_stopwatch_Stop(w);

  p7_tophits_Destroy(h[0]);
  status = eslOK;
 ERROR:
  esl_getopts_Destroy(go);
  esl_stopwatch_Destroy(w);
  esl_randomness_Destroy(r);
  if (sortkeys != NULL) free(sortkeys);
  if (h != NULL) free(h);
  return status;
}
#endif /*p7TOPHITS_BENCHMARK*/


/*****************************************************************
 * 5. Test driver
 *****************************************************************/

#ifdef p7TOPHITS_TESTDRIVE
/*
  gcc -o tophits_utest -std=gnu99 -g -O2 -I. -L. -I../easel -L../easel -Dp7TOPHITS_TESTDRIVE p7_tophits.c -lhmmer -leasel -lm 
  ./tophits_test
*/
#include "p7_config.h"

#include "easel.h"
#include "esl_getopts.h"
#include "esl_stopwatch.h"
#include "esl_random.h"

#include "hmmer.h"

static ESL_OPTIONS options[] = {
  /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
  { "-h",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
  { "-s",        eslARG_INT,     "42", NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                    0 },
  { "-N",        eslARG_INT,    "100", NULL, NULL,  NULL,  NULL, NULL, "number of top hits to simulate",                   0 },
  {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
};

static char usage[]  = "[-options]";
static char banner[] = "test driver for P7_TOPHITS";

int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go       = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *r        = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  int             N        = esl_opt_GetInteger(go, "-N");
  P7_TOPHITS     *h1       = NULL;
  P7_TOPHITS     *h2       = NULL;
  P7_TOPHITS     *h3       = NULL;
  char            name[]   = "not_unique_name";
  char            acc[]    = "not_unique_acc";
  char            desc[]   = "Test description for the purposes of making the test driver allocate space";
  double          key;
  int             i;

  h1 = p7_tophits_Create();
  h2 = p7_tophits_Create();
  h3 = p7_tophits_Create();
  
  for (i = 0; i < N; i++) 
  {
      key = esl_random(r);
      p7_tophits_Add(h1, name, acc, desc, key, (float) key, key, (float) key, key, i, i, N, i, i, N, 1, 1, NULL);
      key = 10.0 * esl_random(r);
      p7_tophits_Add(h2, name, acc, desc, key, (float) key, key, (float) key, key, i, i, N, i, i, N, 2, 2, NULL);
      key = 0.1 * esl_random(r);
      p7_tophits_Add(h3, name, acc, desc, key, (float) key, key, (float) key, key, i, i, N, i, i, N, 3, 3, NULL);
  }
  p7_tophits_Add(h1, "last",  NULL, NULL, -1.0, (float) key, key, (float) key, key, i, i, N, i, i, N, 1, 1, NULL);
  p7_tophits_Add(h1, "first", NULL, NULL, 20.0, (float) key, key, (float) key, key, i, i, N, i, i, N, 1, 1, NULL);

  p7_tophits_SortBySortkey(h1);
  if (strcmp(h1->hit[0]->name,   "first") != 0) esl_fatal("sort failed (top is %s = %f)", h1->hit[0]->name,   h1->hit[0]->sortkey);
  if (strcmp(h1->hit[N+1]->name, "last")  != 0) esl_fatal("sort failed (last is %s = %f)", h1->hit[N+1]->name, h1->hit[N+1]->sortkey);

  p7_tophits_Merge(h1, h2);
  if (strcmp(h1->hit[0]->name,     "first") != 0) esl_fatal("after merge 1, sort failed (top is %s = %f)", h1->hit[0]->name,     h1->hit[0]->sortkey);
  if (strcmp(h1->hit[2*N+1]->name, "last")  != 0) esl_fatal("after merge 1, sort failed (last is %s = %f)", h1->hit[2*N+1]->name, h1->hit[2*N+1]->sortkey);

  p7_tophits_Merge(h3, h1);
  if (strcmp(h3->hit[0]->name,     "first") != 0) esl_fatal("after merge 2, sort failed (top is %s = %f)", h3->hit[0]->name,     h3->hit[0]->sortkey);
  if (strcmp(h3->hit[3*N+1]->name, "last")  != 0) esl_fatal("after merge 2, sort failed (last is %s = %f)", h3->hit[3*N+1]->name,     h3->hit[3*N+1]->sortkey);
  
  if (p7_tophits_GetMaxNameLength(h3) != strlen(name)) esl_fatal("GetMaxNameLength() failed");

  p7_tophits_Destroy(h1);
  p7_tophits_Destroy(h2);
  p7_tophits_Destroy(h3);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return eslOK;
}
#endif /*p7TOPHITS_TESTDRIVE*/


/*****************************************************************
 * HMMER - Biological sequence analysis with profile HMMs
 * Version i1.1.1; July 2014
 * Copyright (C) 2014 Howard Hughes Medical Institute.
 * Other copyrights also apply. See the COPYRIGHT file for a full list.
 * 
 * HMMER is distributed under the terms of the GNU General Public License
 * (GPLv3). See the LICENSE file for details.
 *
 * SVN $Id: p7_tophits.c 4055 2012-05-30 20:32:49Z wheelert $
 * SVN $URL: https://svn.janelia.org/eddylab/eddys/src/hmmer/branches/infernal/1.1/src/p7_tophits.c $
 *****************************************************************/





