#!@WHICHPERL@ -w
# AUTHOR: Haoyuan Zhu and William Stafford Noble and Timothy L. Bailey
# CREATE DATE: 1/29/2002
# PROJECT: MHMM
# DESCRIPTION: Convert Meta-MEME files to HTML format. 
use strict;

# To do list:
#  o Automatically determine alphabet.
#  o Format header better.
#  o Handling of mhmma files is untested.
#  o Determine correct parameters for format_diagrams().
#  o The block diagrams are too wide.
#  o Update explanation.  (Done for mhmmscan.)

### GLOBAL VARIABLES.

# Usage message.
my $usage = "USAGE: mhmm2html [options] <file>

  Options: -alphabet dna|protein (default=dna)
           -diagram-scale <value> (default=1)\n";


# Get the documentation directory path by finding the
my $docdir = "@DOCDIR@";

# process_request.cgi is used to print plain text file
#my $process_request = "http://meme-suite.org/cgi-bin/process_request.cgi";
my $process_request = "../cgi-bin/mhmm_process_request.cgi";

# Dimensions of motif occurrence diagrams.
my $SCALE = 0.25;          # sequence_positions/pixel
my $MAX_DIAGRAM = 100000;  # maximum number of pixels per diagram

# Global list of motif widths
my %WIDTHS;

# Section header names.
my @mhmm_sections = ("HMM STATES", "HMM TRANSITIONS", 
		  "EXPLANATION OF THE METAMEME MODEL FILE",
		  "PROGRAM PARAMETERS");
my @mhmma_sections = ("MULTIPLE ALIGNMENT");
my @mhmms_sections = ("DATABASE SEARCH RESULTS", "ALIGNMENTS", 
		   "MOTIF DIAGRAMS", "EXPLANATION OF OUTPUT", 
		   "PROGRAM PARAMETERS");
my @colors = ("#DDDDFF", "#00FFFF", "#DDFFDD", "#FFFF00", "#DDAA00");

# Define buttons for navigation within document
my %buttons = (
	       "top", 
	       "<A HREF='\#top_buttons'><B>Go to top</B></A>!#DDDDFF!#000000"
	       );


my $text_diagram;
my $num_seqs;


##############################################################################
#  Functions originally from convert2html...
#  as this is the only remaining script that uses convert2html the
#  functions have been merged to save clutter...
#  START convert2html.pl
##############################################################################
#
# subroutines and globals used by mhmm2html
#

my $DIVIDER = "^\\*\\*\\*\\*\\*";		# section divider in output
my $SUBDIV = "^--------------------------------------------------------------------------------";				# subsection divider in output
my $ELIPSIS = "<B> &middot <BR> &middot <BR> &middot </B>";
my $BODY = "#D5F0FF";	# the background color of the page (light blue by default)
my $WEAK_FONT = "50% sans-serif";	# font size for weak motifs
my $THIN_LINE = 4;		# thickness of thin spacer lines
my $FAT_LINE = 8;		# thickness of fat spacer lines (for too long seqs)
my $MIN_WIDTH = 30;	# minimum width (in pixels) for motifs
my $MAX_NAME_LEN = 34;	# maximum length of truncated sequence name

# Colors for the motifs and their labels (motif numbers).
my @MOTIF_COLORS=( "aqua", "blue", "red", "fuchsia",
        "yellow", "lime", "teal", '#444444',
        "green", "silver", "purple", "olive",
        "navy", "maroon", "black", "white" );
my @MOTIF_LABEL_COLORS=( "black", "white", "white", "black",
        "black", "black", "white", "white",
        "white", "black", "white", "black",
        "white", "white", "white", "black" );
my @IC_COLORS=( "red", "blue", "orange", "green", "black",
        "magenta", "pink", "yellow", "turquoise" );

# Added this global here--must be the same as the global in all the calling
# programs (meme2html, mast2html), but they will obsolete soon!
my $MBPSUB = "XXX---XXX";

#-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
#-*  SUBROUTINES
#-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*

#------------------------------------------------------------------------------
# print the HTML header (including style sheet) and set the body color
#	$title		title of HTML page
#	$body_color	background color; light blue if omitted
#------------------------------------------------------------------------------
sub print_header {
  my (
	$title,			# title of HTML page
	$body_color		# background color; light blue if omitted
  ) = @_;
  my ($i);

  if (defined($body_color)) { $BODY = $body_color; }

  print <<END;
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<HTML>
<HEAD>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<TITLE>$title</TITLE>
END

  # print the style sheet
  print '<STYLE type="text/css">', "\n";

  # these save space compared with <FONT></FONT>

  # color for invisible lines
  print "  TD.invisible { color: '$BODY'; }\n";

  # TD classes for motifs
  for ($i=0; $i<@MOTIF_COLORS; $i++) {
    # normal motifs
    print "  TD.c$i { background: $MOTIF_COLORS[$i]; color: $MOTIF_LABEL_COLORS[$i]; }\n";
    # weak motifs
    print "  TD.cw$i { background: $MOTIF_COLORS[$i]; color: $MOTIF_LABEL_COLORS[$i]; font: $WEAK_FONT; }\n";
  }

  # B, TD and TH classes for IC diagrams and scale
  foreach my $color (@IC_COLORS) {
    print "  B.$color { color: $color; }\n";
    print "  TD.$color { color: $color; }\n";
    print "  TH.$color { color: $color; }\n";
  }

  print "</STYLE>\n";

  #
  # end the header and start the body
  #
  print "</HEAD>\n";
  print "<BODY BGCOLOR='$BODY'>\n";
} # print_header
 
#------------------------------------------------------------------------------
# find a section marker with the specified keyword (case sensitive)
#------------------------------------------------------------------------------
sub find_section {

	my( $key ) = @_;
	my( $line );
	LINE:
	while ( <STDIN> ) {
		unless ( /\*\*\*\*\*/ ) { next LINE; }

		$line = <STDIN>;
		unless ( $line =~ /$key/ ) {
			<STDIN>;
			next LINE;
		}
		last;
	}
	return( $line );
} # find_section

#------------------------------------------------------------------------------
# next_section:
# Find a section marker and return the title line.  The following line of
# stars is removed.
#
# USAGE: $text = &next_section();
#------------------------------------------------------------------------------
sub next_section {
	my( $div ) = @_;
	my( $line );

	unless ( defined($div) ) {
		$div = $DIVIDER;
	} 

	while ( <STDIN> ) {
		unless ( /^$div/ ) { next; }
		$line = <STDIN>;
		<STDIN>;
		last;
	}
	chop $line;
	return( $line );
} # next_section

#------------------------------------------------------------------------------
# read_block:
# Read a block of text until terminated by a blank line.
#------------------------------------------------------------------------------
sub read_block {
	my( $line );

	LINE:
	while ( <STDIN> ) {
		if ( /^\s*$/ ) { last LINE; }
		$line .= $_;
	}
	return( $line );
} # read_block

#------------------------------------------------------------------------------
# next_block:
# Read the next block of text until terminated by a divider line.  The divider
# by default, is specificed in $DIVIDER (normally a line of *).  If a 
# parameter is passed in, it is used as the divider.
# Removes \r from input.
#
# USAGE: $text = &next_block();
#        $text = &next_block( divider );
#------------------------------------------------------------------------------
sub next_block {
	my( $div ) = @_;
	my( $line );

	unless ( defined($div) ) {
		$div = $DIVIDER;
	} 

	while ( <STDIN> ) {
		s/\r//g;
		if ( /$div/ ) { last; }
		$line .= $_;
	}
	return( $line );
} # next_block

#------------------------------------------------------------------------------
# format_section:
# Add HTML formatting for a section or subsection head.  
# The specified heading appears as
# the section title with the specified name as an internal link.
#
# USAGE: $text = &format_section( pre, link, rest, name, ext)
#------------------------------------------------------------------------------
sub format_section {
  my(
    $pre,					# just print this part
    $link,	 				# add link to this part
    $rest, 					# just print this part
    $name, 					# tag is "$name$ext"
    $ext 					# ref is to "$name_doc"
  ) = @_;
  my( $out, $ref, $tag );

  if ( $name =~ /^\s*$/ ) {			# all blank name
    $out = "<HR><CENTER> $pre <BIG><B>\n $link $rest\n</B></BIG></CENTER><HR>";
  } else {					# name given
    $ref = $name . "_doc";
    $tag = $name . $ext;
    $link = "<A HREF=\"#$ref\">$link</A>";
    $out = "<HR><CENTER><A NAME=$tag></A>\n $pre <BIG><B> $link $rest\n</B></BIG></CENTER><HR>";
  }
  return($out);
} # format_section

#------------------------------------------------------------------------------
# format_para:
# Add HTML formatting for a paragraph.  New lines are ignored, blank lines are
# converted to paragraphs.
#
# USAGE: $new_text = &format_para( text );
#------------------------------------------------------------------------------
sub format_para {
	my ( $line ) = @_;
	my( $out );

	$out = "<P>\n$line";
	$out =~ s/\n\s*\n/\n<P>\n/g;

	return( $out );
} # format_para

#------------------------------------------------------------------------------
# format_pre:
# Add HTML formatting for a preformatted block.  
#
# USAGE: $new_text = &format_pre( text );
#------------------------------------------------------------------------------
sub format_pre {
	my ( $line ) = @_;
	my( $out );

	$out = ($line=~ /\w/) ? "<PRE>\n$line</PRE>\n" : "";

	return( $out );
} # format_pre

#------------------------------------------------------------------------------
# format_diagrams:
# Convert the text diagrams to colored diagrams in HTML.  The scale is 1/$scale
# pixels per sequence position for sequences of up to $max_diagram*$scale pixel.
# Diagrams for longer sequences are scaled to fit in $max_diagram pixels 
# and the spacer lines are made thicker (and motif boxes may shrink).
# Weak motifs are labeled with font size $WEAK_FONT.
# When the motifs are protein and the database DNA, the motif widths are
# multiplied by 3 since they are in codon units to start with.
# Uses global variables @MOTIF_COLORS, $WEAK_FONT, $FAT_LINE, $THIN_LINE.
# 
#
# Sets global $META to contain information from SUMMARY of MOTIFS for 
# Meta-MEME.
#
#------------------------------------------------------------------------------
sub format_diagrams {
  my( $scale, $max_diagram, $text, $db, $stype, $xlate, $make_buttons,
    $col2hdr, $no_gi_names, $field_delim, $skip, $width_ref) = @_;
  my( $i, $out, @line, $l, $w, $wid_sum, $nmotifs, $nspacers);
  my( $max_spacers, $max_motifs, $name, $evalue, $diagram);
  my( @field, $f, $motif, $link, $seqno );
  my( @scale1, $col, $color, $font, $fsize, $wide, $fill, $mscale );
  my( $ncol, $dist, $w2, $loc, @nocc, @seqlen, $lno, $ncolors );

  my $re_en = "\\([+-]?\\d*\\.?\\d*e[+-]?\\d+\\)";  # an e-format in parens

  # start a table and header row
  $out = "<TABLE SUMMARY='motif diagrams' BORDER=1>\n<TR>";
  $ncol = 0;				# number of columns in table
  $ncolors = scalar(@MOTIF_LABEL_COLORS);	# number of distinct motif colors

  # put buttons linking to score and annotation?
  if ($make_buttons) { $out .= "<TH>Links"; $ncol += 1; }

  if ($stype eq "s") {			# scoring DNA strands separately
    $out .= "<TH>Name<TH>Strand<TH>$col2hdr<TH ALIGN=LEFT>&nbsp;&nbsp; Motifs\n";
    $ncol += 4;
  } else {				# PROTEIN
    $out .="<TH>Name<TH>$col2hdr<TH ALIGN=LEFT>&nbsp;&nbsp; Motifs\n";
    $ncol += 3;
  }

  # split the text into lines
  $text =~ s/\n\s+//g;			# concat continued lines
  @line = split /\n/, $text;

  # find the width of each diagram
  if ($xlate) {				# translating DNA
    $mscale = 3;
  } else {
    $mscale = 1;
  }
  my $max_width = 0;

  # remove header lines
  for ($i=0; $i<=$#line; $i++) {
    $l = $line[$i];
    last if ($l =~ /-------------/);
  }
  while ($i>=0) { shift @line; $i--; }

  #
  # calculate the approximate width of diagram
  #
  $lno = 0;				# line number
  $max_motifs = $max_spacers = 0;	# no diagrams read yet
  $seqno = 0;
  foreach $l (@line) {
	  if ($stype eq "s") {
            my $strand;
	    ($name,$strand,$evalue,$diagram) = ($field_delim eq " ") ?
              split( " ", $l ) :
              split( $field_delim, $l );
	  } else {
	    ($name,$evalue,$diagram) = ($field_delim eq " ") ?
	      split( " ", $l ) :
	      split( $field_delim, $l );
	  }

	  # get sequence number
            $seqno++;

	  # skip this sequence?
	  if (defined($skip) && defined($skip->[$seqno])) { next; }

	  # split the diagram into fields
	  @field = split( '[ _]', $diagram );
	  # calculate the approximate width
	  $wid_sum = $nocc[$lno] = 0;
	  foreach $f (@field) {
	    if ( $f =~ /[<>\[\]]/ ) {		# motif occurrence
	      ($motif) = $f =~ /[<\[][+-]?(\d+)[abc]?($re_en)?[>\]]/;
	      $wid_sum += $$width_ref{$motif} * $mscale;
              $seqlen[$lno] += $$width_ref{$motif};	# length of sequence
	      $nocc[$lno]++;			# number of motif occurrences
	    } elsif ($f ne "") {			# spacer
              $seqlen[$lno] += $f;		# length of sequence
	      $wid_sum += $f;
	    }
	  }
	  # calculate a scale so that diagram fits in $max_diagram pixels
	  $scale1[$seqno] = $scale;
	  if ($wid_sum/$scale > $max_diagram) { 
	    $scale1[$seqno] = $wid_sum/$max_diagram;
	  }
	  # calculate the exact scaled diagram width
	  $wid_sum = 0;
	  $nmotifs = 0;
	  $nspacers = 0;
	  foreach $f (@field) {
	    if ( $f =~ /[<>\[\]]/ ) {
	      ($motif) = $f =~ /[<\[][+-]?(\d+)[abc]?($re_en)?[>\]]/;
	      $wide = int($mscale*$$width_ref{$motif}/$scale1[$seqno]+0.5);
	      if ($wide < $MIN_WIDTH) { $wide = $MIN_WIDTH; }
	      $nmotifs++;
	    } elsif ($f ne "") {
	      $wide = $f/$scale1[$seqno];
	      $wide = int($wide + 0.5);		# round to integer
	      $nspacers++;
	    } else {
	      $wide = 0;
            } 
	    $wid_sum += $wide;
	  }
	  # save the length of the longest diagram
	  if ($wid_sum > $max_width) { 
            $max_width = $wid_sum; 
	    $max_motifs = $nmotifs;
	    $max_spacers = $nspacers;
	  }
    $lno++;					# line number
  } # line

  # kludge for Netscape 4.0; make width larger
  $max_width += 14 + (2*$max_motifs) + (3*$max_spacers);
  
  # set max_width to at least
  my $min_width = int(50.0/$scale + 0.5); 
  if ($max_width < $min_width) { $max_width = $min_width; }
  
  # make the diagrams
  my $META = "";
  $lno = 0;					# line number
  $seqno = 0;
  foreach $l (@line) {
    my $strand;
    # length of space holder at end of diagram
    if ($stype eq "s") {
      ($name,$strand,$evalue,$diagram) = ($field_delim eq " ") ?
        split( " ", $l ) :
        split( $field_delim, $l );
    } else {
      ($name,$evalue,$diagram) = ($field_delim eq " ") ?
	split( " ", $l ) :
	split( $field_delim, $l );
    }
    $seqno++;

    if (defined($skip) && defined($skip->[$seqno])) {
      if (!defined($skip->[$seqno-1])) { $out .= "<TR><TD COLSPAN=5>$ELIPSIS\n"; }
      next;				# skip this sequence
    } 

    # start row of table
    $out .= "<TR>\n ";

    if ($make_buttons) {
      $evalue = "$evalue";
      my $button;
      if ( $name =~ /\|/ ) {
	$button = make_button_panel("!", \%buttons, 
	  $db."entrez!$name", "score!$seqno", "align!$seqno", "help" );
      } else { 
	$button = make_button_panel("!", \%buttons, 
	  "score!$seqno", "align!$seqno", "help" );
      }
      $out .= "<TD>$button\n";
    } # make_buttons

    # write name of sequence 
    if ($no_gi_names) {
      $name =~ s/^gi\|\d+\|//;			# leading gi|123 removed
    }
    $link = "<A NAME=d$seqno></A>$name";
    $out .= " <TD>$link\n";
    $META .= "$name";

    if ($stype eq "s") {
      $out .= " <TD ALIGN=CENTER>$strand\n";
    }
    $out .= " <TD ALIGN=RIGHT NOWRAP>$evalue\n";
    $META .= " $evalue $nocc[$lno] $seqlen[$lno]";
    $out .= " <TD><TABLE SUMMARY='diagram $seqno' WIDTH=$max_width BORDER=0 ALIGN=LEFT CELLSPACING=0 CELLPADDING=0><TR ALIGN=CENTER>\n";
    @field = split( '[ _]', $diagram );
    $fsize = $THIN_LINE;		# font size for spacer line
    if ($scale1[$seqno] != $scale) { $fsize = $FAT_LINE; }
    my $tail = $max_width;
    my $position = 0;				# position in sequence
    foreach $f (@field) {
      my ($st, $frame, $pv);
      if ($f eq "") {
        next;
      } elsif ( $f =~ /[<>\[\]]/ ) {		# motif occurence
	($st, $motif, $frame, $pv) = $f =~ 
          /[<\[]([+-]?)(\d+)([abc]?)($re_en)?[>\]]/;
        if (!defined $pv) {
          $pv = "(?)";
        }
        ($pv) = $pv =~ /\(([^)]+)\)/;
	$wide = int($mscale*$$width_ref{$motif}/$scale1[$seqno]+0.5);
	if ($wide < $MIN_WIDTH) { $wide = $MIN_WIDTH; }
        if ($motif eq ""){
          $color = "gray";
          $motif = "&nbsp";
        } else {
          $col = ( $motif - 1 ) % $ncolors; 
          $color = $MOTIF_COLORS[$col];
        }
	# set color and size of motif label;
	# weak motifs have font size $WEAK_FONT
	$font = "";
        my $endfont = "";
        my $class = ($f =~ /</) ? "cw$col" : "c$col"; 
	$out .= "  <TD CLASS='$class' WIDTH=$wide>$st$motif$frame\n";
        $META .= "  $st$motif $position $pv";	# for meta-meme
        $position += $$width_ref{$motif};	# letter position in sequence
      } elsif ($f ne "") {			# spacer
	$wide = $f/$scale1[$seqno];
	$wide = int($wide + 0.5);		# round to integer
	$out .= 
	 "  <TD WIDTH=$wide><HR SIZE=$fsize NOSHADE>\n";
        $position += $f;			# letter position in sequence
      }
      $tail -= $wide;
    }
    if ($tail > 0) { $out .= "  <TD WIDTH=$tail>\n"; }
    $out .= " </TABLE>\n";
    $META .= "\n";				# metameme data
    $lno++;					# line number
  } # line

  #
  # print a scale
  #
  $dist = 50;					# pixel distance between rules
  $w2 = $dist - 1.0/$scale;			# distance to second rule
  $ncol--;					# number of columns scale spans
  $out .= "<TR><TH CLASS='blue' COLSPAN=$ncol ROWSPAN=2 ALIGN=LEFT>SCALE\n";
  $out .= "  <TD><TABLE SUMMARY='scale' WIDTH=$max_width BORDER=0 ALIGN=LEFT CELLSPACING=0 CELLPADDING=0><TR ALIGN=CENTER>\n";
  $out .= "    <TD CLASS='blue' WIDTH=$w2 ALIGN=LEFT>|</TD>\n";
  for ($i=$dist; $i<$max_width; $i+=$dist) {
    $loc = $i * $scale;
    last if ($i > $max_width-50);	# make sure number will fit
    $out .= "    <TD CLASS='blue' WIDTH=$dist ALIGN=LEFT>|</TD>\n";
  }
  $out .= "  <TR><TD CLASS='blue' WIDTH=$w2 ALIGN=LEFT>1</TD>\n";
  for ($i=$dist; $i<$max_width; $i+=$dist) {
    $loc = $i * $scale;
    last if ($i > $max_width-50);	# make sure number will fit
    $out .= "    <TD CLASS='blue' WIDTH=$dist ALIGN=LEFT>$loc</TD>\n";
  }
  $out .= "  </TABLE>\n";

  # end the table
  $out .= "</TABLE>\n";

  return( $out );
} # format_diagrams

#------------------------------------------------------------------------------
#	find_line
#------------------------------------------------------------------------------
sub find_line {

    my( $key ) = @_;
    my( $line );

    LINE:
    while ( $line = <STDIN> ) {

        unless ( $line =~ /$key/ ) {
            next LINE;
        }
        last;
    }
    return( $line );
} # find_line

#------------------------------------------------------------------------------
# make_button_panel:
#
# Use the input associative array, and the specified keys to create a HTML
# table with labeled buttons.
#
# The buttons are defined in the associative array passed in the second
# parameter.  This array is indexed by the key, and contains three fields
# separated by the divider specified in the 1st parameter.  The fields 
# are the tag template, the background color, and the font color.  Note that
# a tag template that is a link will override the font color.
#
# The tag template is a string that, optionally, can have the key substituted
# into it at all positions marked by $MBPSUB (this is a symbol to avoid 
# collisions with arbitrary strings).
#
# Note that each line containing a button panel should be followed with a
# <BR CLEAR=LEFT> to prevent the next line from being indented to the position
# following the panel.  This is because, even though the panel uses a -1 font,
# it is still taller than the text and therefore creates an apparent indent in
# the same way a drop capital would.
#
# USAGE:
#	<string>  = &make_button_panel( <div>, <button def>, [<key1>,<key2>...] );
#
# button def:
# button defs have three elements separated by the divider (div) :
#	Element 1: template for text that will appear on the button.  This 
#	can be static text, or contain a place hold where key information will
#	be inserted ($MBPSUB).
#	Element 2: background color for the button.
#	Element 3: foreground color for the button.  Note if button is a link, 
#	this color will be overridden by the link color.
#
# key:
# keys have one or two elements separated by the divider:
#	Element 1: mandatory.  this is the button type and is used to look up
#			   the button definition.
#	Element 2: optional.  If present, this is the key information that will
#	be substituted for $MBPSUB.  If absent, the button type is used.
#
# default behavior:
# if a button type is undefined, a black and white button will be displayed 
# using the key information as the label on the button. 
#
# EXAMPLE
#	$MBPSUB = "XXXXXXX";
#	
#	%p1 = ( 
#         'aaa', 
#         '<A HREF=http://www.sdsc.edu/~gribskov>$MBPSUB</A>&#FFAAAA&white', 
#	  'b', 'B&#DDDDFF&white', 
#	  'c', 'C&white&black' 
#	);
#	
#	%p2 = ( 'fff', 'F green white',
#	        'ggg', 'G blue white' );
#	
#	$bpanel = &make_button_panel( "&", \%def, "a&?", "c" );
#	print "$bpanel text following the buttons goes here<BR CLEAR=LEFT>\n";
#	$bpanel =  &make_button_panel( " ",  \%p2, "fff", "ggg", "hhh" );
#	print "$bpanel A second panel with text<BR CLEAR=LEFT>\n";
#
#	19 November 1997     Michael Gribskov
#------------------------------------------------------------------------------
sub make_button_panel {

        my ($div, $def, @parm) = @_;
        my ($p, $n, $type, $text, $tag, $bkg, $bcolor, $font, $fcolor, $out, $key);

        $out = "<TABLE SUMMARY='buttons' ALIGN=LEFT CELLSPACING=0><TR>";
        foreach $p ( @parm ) {

            # split the key into the tag (type) and text
            $n = ($type, $text) = split /$div/, $p;
            if ( $n > 1 ) {
                $key = $text;
            } else { $type=$p; $key = $p; }

            #look up the button definition using the type as key
            ($tag, $bcolor, $fcolor) = split /$div/, $def->{$type};

            unless ( defined($tag) ) { $tag = $key; }
            unless ( defined($bcolor) ) { $bcolor = "white"; }
            unless ( defined($fcolor) ) { $fcolor = "black"; }

            # replace all occurrences of $MBPSUB with the key
            while ( $tag =~ s/$MBPSUB/$key/ ){};

            $out .= "\n  <TD BGCOLOR='$bcolor'>$tag";
        }
        $out .= "</TABLE>";

        return( $out );
} # make_button_panel

################################################################################
#	get_color
#
#	Get the name of the color for the given DNA or Protein letter. 
#	Colors are similar to those used by CLUSTAL.
#
################################################################################
sub get_color {
  my ($type, $letter) = @_;
  my $color;
  $_ = $letter;

  if ($type =~ /DNA|dna/) {
    if (/[Aa]/) {
      $color = "red";
    } elsif (/[Cc]/) {
      $color = "blue";
    } elsif (/[Gg]/) {
      $color = "orange";
    } elsif (/[Tt]/) {
      $color = "green";
    } else {
      $color = "black";
    }
  } elsif ($type =~ /PROTEIN|protein/) {
    if (/[ACFILMVWacfilmvw]/) {
      $color = "blue";
    } elsif (/[NQSTnqst]/) {
      $color = "green";
    } elsif (/[DEde]/) {
      $color = "magenta";
    } elsif (/[KRkr]/) {
      $color = "red";
    } elsif (/[Hh]/) {
      $color = "pink";
    } elsif (/[Gg]/) {
      $color = "orange";
    } elsif (/[Pp]/) {
      $color = "yellow";
    } elsif (/[Yy]/) {
      $color = "turquoise";
    } else {
      $color = "black";
    }
  }
  return $color;
} # get_color 

#------------------------------------------------------------------------------
# Cleanup any temporary files
#------------------------------------------------------------------------------
sub cleanup {
  #system "rm $pgm.$$.*.tmp";
  if ($_[0] eq "INT") { exit(1); } else { exit($_[0]); }
} # cleanup

#
# Haoyuan added some functions here in order to convert mhmm result from 
# plain text to html (mhmm2html)
#

#-----------------------------------------------------------------------------
# format_subtile
# Add HTML sub_title, <hr><b><center> .... </center></b><hr>.
# 
# USAGE: $new_text = &format_subtitle(text)
#-----------------------------------------------------------------------------
sub format_subtitle {
    my ($line) = @_;
    my ($out);
    $out = "<HR>\n<CENTER><B>$line</B></CENTER>\n<HR>\n";
    return($out);
} #format_subtitle

#----------------------------------------------------------------------------
# format_hidden
# Add HTML Hidden Field
#
# USAGE: $new_text = &format_hidden(textname,textfield)
#----------------------------------------------------------------------------
sub format_hidden {
  my ($name, $field) = @_;
  my ($out);

  # replace confounding characters (">) that won't work in HIDDEN values
  $field =~ s/\"/&quot;/g;
  $field =~ s/\>/&gt;/g;

  $out = "<INPUT TYPE = \"HIDDEN\" NAME = \"$name\"".
    " VALUE = \"\n".
    "$field".
    " \">\n";
} #format_hidden

#----------------------------------------------------------------------------
# delete_first_letter
# Delete the first letter of a sentence
# In mhmm2html file, it is used to chop "#"
# USAGE: $new_text = &delete_first_letter(text)
#----------------------------------------------------------------------------
sub delete_first_letter{
    my ($line) = @_;
    my ($out);
    if (length($line) == 0){
	print (STDERR "Error: The length of your input is 0\n");
	exit(0);
    }
    else{
	my($length) = length($line);
	$out = substr($line,1,$length-1);
    }
    return($out);
}

#--------------------------------------------------------------------------
# make_end
# Add some "last" info. to the html output page
#
# USAGE: $end = &make_end(\%buttons)
#--------------------------------------------------------------------------
sub make_end{
    my ($buttons_ref) = @_;
    my ($out, $button);

    # button to take user to top of form
    $button = make_button_panel("!", $buttons_ref, "top");
    # finish form, body and documument
    $out = "<HR>$button<BR>\n</FORM>\n</BODY>\n</HTML>\n";
    return ($out);
}

#-------------------------------------------------------------------------
# format_table
# format html table from input plain text
#
# USAGE: $new_text = &format_table($text,$num_col)
#-------------------------------------------------------------------------
sub format_table{
    my ($out,@temp1,$i,@temp2,$end_mark,$j);
    my ($line,$num_col) = @_;
    print "<table border = \"1\">\n";
    @temp1 = split("\n",$line);
    for ($i=0;$i<=$#temp1;$i++){
	@temp2 = split(" ",$temp1[$i]);
	if ($num_col eq "0"){
	    $end_mark = $#temp2;
	}
	else{
	    $end_mark = $num_col-1;
	    for ($j=$num_col;$j<=$#temp2;$j++){
		$temp2[$num_col-1] = $temp2[$num_col-1]." ".$temp2[$j];
	    }
	}
	print "<tr>\n";
	for ($j=0;$j<=$end_mark;$j++){
	    print "<td>";
	    print "$temp2[$j]";
	    print "</td>";
	}
	print "\n</tr>\n";
    }
    print "</table>\n";
}

#######################################################################
# Add Entrez link
#######################################################################
sub add_entrez_link{
  my ($line, $alphabet) = @_;
  my($return_value);

  $return_value = 
    "<A HREF=\'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=";
  if ($alphabet eq "dna"){
    $return_value .= "Nucleotide";
  } else {
    $return_value .= "Protein";
  }
  $return_value .= "&amp;cmd=Search&amp;term=$line&amp;doptcmdl=GenBank\' TARGET =\'_blank\'>$line</a>";

  return($return_value);
} # add_entrez_link

##############################################################################
#  END convert2html.pl
##############################################################################

##############################################################################
# Parse the command line.
if (scalar(@ARGV) < 1){
  print (STDERR $usage);
  exit(0);  
}
my $alphabet = "dna";
my $diagram_scale = 1;
my $mhmmscan;
while (scalar(@ARGV) > 1) {
  my $next_arg = shift(@ARGV);
  if ($next_arg eq "-alphabet") {
    $alphabet = shift(@ARGV);
  } elsif ($next_arg eq "-diagram-scale") {
    $diagram_scale = shift(@ARGV);
  } elsif ($next_arg eq "-mhmmscan") {
    $mhmmscan = 1;
  } elsif ($next_arg eq "-test") {
    # This is an undocumented flag used by mhmmscan to locate this script.
    print("mhmm2html\n");
    exit(0);
  } else {
    die("Illegal argument ($next_arg).\n$usage");
  }
}
my($infile_name) = @ARGV;

# Open the input file.
open(INFILE, "<$infile_name") || die("Can't open $infile_name.");

# Print the introductory section.
my $input_type = &make_introduction(\*INFILE);

#
# Print the sections unique to each program. 
#
if ($input_type eq "mhmm") {
  &make_mhmm(\*INFILE);
}
elsif ($input_type eq "mhmma") {
  &make_mhmma($alphabet, \*INFILE);
}
elsif (($input_type eq "mhmms") ||
       ($input_type eq "mhmmscan")) {
  &make_mhmms($input_type, $alphabet, \*INFILE, $input_type);
}
else {
  die("Unrecognized program ($input_type).\n");
  exit(1);      
}

# Print the explanation of output section.
&make_explanation($input_type);

# Print the parameters section.
&make_parameters(\*INFILE);

# Print the whole file as a hidden field.
&make_hidden_copy(\*INFILE);

# Print the end of the HTML file.
print(&make_end(\%buttons));

close(\*INFILE);

########################################################################
# Print out some introductory information.
# 
# FIXME: This could be formatted better.
########################################################################
sub make_introduction {
  my($infile, @sections) = @_;
  my($program);

  # Read the first line.
  my $line = <$infile>;
  chomp($line);
  
  # Remove the hash.
  $line = substr($line, 2);

  # Make sure it says what it should.
  if ($line ne "The MEME Suite of motif-based sequence analysis tools.") {
    die("Cannot HTML-ize a file without a header.\n$line\n");
  }

  # HTML-ize the first line.
  my $header = &format_subtitle($line);  

  # Read the rest of the header.
  while ($line = <$infile>) {
    chomp($line);

    # Remove the hash.
    $line = substr($line, 2);

    # Get the program name.
    my @words = split(' ', $line);
    if (defined($words[0])) {
      if ($words[0] eq "Program:") {
	$program = $words[1];
	$header .= "<PRE>";
      } elsif ($words[0] eq "For") {
	$header .= "</PRE>";
      }
    }

    # We've reached the end of the header.
    if ($line eq "*****************************************************************************") {
      last;
    }

    if ($line eq "") {
      $header .= "<BR><BR>\n";
    } else {
      $header .= "$line\n";
    }
  }
  
  # Print the HTML header.
  print_header($program, "white");

  # Get the appropriate list of section names.
  if ($program eq "mhmm") {
    @sections = @mhmm_sections;
  } elsif ($program eq "mhmma") {
    @sections = @mhmma_sections;
  } elsif (($program eq "mhmms") || ($program eq "mhmmscan")) {
    @sections = @mhmms_sections;
  } else {
    die("Unrecognized program ($program).\n");
  }
  
  # Print the section cross-references.
  print "<A NAME='top_buttons'></A><HR>\n";
  print("<TABLE SUMMARY='' ALIGN=LEFT BORDER=0><TR>\n");
  foreach my $section (@sections) {

    # Replace spaces with underscores.
    my $new_section = $section;
    $new_section =~ s/ /_/g;

    # Print the link.
    my $color = shift(@colors);
    print("<TD BGCOLOR=#$color>");
    print("<A HREF=\"#$new_section\"><B>$section</B></A></TD>\n");
  }
  print("</TR></TABLE><BR CLEAR=LEFT>\n");

  # Print the header.
  print($header);

  # Add a "View plain text" button.
  #print("<FORM METHOD = POST ACTION = \"$process_request\">
  #<INPUT TYPE = SUBMIT NAME = action VALUE = 'VIEW PLAIN TEXT'>\n");

  return($program);
} #sub make_introduction

#########################################################################
# Store the entire input file as a hidden field.
#########################################################################
sub make_hidden_copy {
  my($infile) = @_;
  
  # Store the contents of the file as a single variable.
  my $text = "";
  while (my $line = <$infile>){
    $text .= $line;
  }
  print(&format_hidden("plain text", $text));
} # make_hidden_copy

########################################################################
# Print a subtitle to stdout, including a section reference.
########################################################################
sub print_subtitle {
  my ($line) = @_;

  my $new_line = $line;
  $new_line =~ s/ /_/g;
  print("<A NAME=$new_line></A>");
  print(&format_subtitle($line));
  "";				# so you can use it like format_subtitle
} #print_subtitle


########################################################################
# Generate html output for mhmm.
########################################################################
sub make_mhmm {
  my($infile) = @_;

  # Print the state table header.
  &print_subtitle("HMM STATES");

  # Read and print the state information.
  my $text = "";
  my $line = <$infile>;
  while (!($line =~ /Transition probability matrix/)){
    $text .= $line;
    $line = <$infile>;
  }
  print(&format_pre($text));

  # Print the transition table header.
  &print_subtitle("HMM TRANSITIONS");

  # Add state indices to the transition table.
  $text = "state";
  my $num_transition = &num_transition($line);
  for (my $i = 0; $i < $num_transition; $i++) {
    $text .= " $i";
  }
  $text .= "\n";

  # Add transitions to the transition table.
  my $num_lines = 0;
  $line = <$infile>;
  while (!($line =~ /End of MHMM/)){
    $text .= "$num_lines $line";
    $num_lines++;
    $line = <$infile>;
  }
  
  # Print the transition table.
  &format_table($text, 0);

} # make_mhmm

################################################################### 
# num_transition
# How many transition states in MHMM model file
#
# USAGE: $num = &num_transition($line)
###################################################################
sub num_transition{
  my ($line) = @_;
  my @temp1 = split("x",$line);
  my @temp2 = split(":",$temp1[$#temp1]);
  my $out = substr($temp2[0],0,length($temp2[0])-1);
  return($out);
} # num_transition

#################################################################
# make_mhmma
# to generate html output page for mhmma
#################################################################
sub make_mhmma {
  my($alphabet, $infile) = @_;

  my $text = "";
  my $i_line = 0;
  my $num_space = 0;
  my $label = 0;

  while (my $line = <$infile>){
    $i_line ++;
    #skip the first line
    if ($i_line == 1){
    }
    elsif (!($line =~ /\/\//)){
      $text = $text.$line;
    }
    elsif ($line =~ /\/\//){
      $text = "";
      $i_line = 0;
      $line = &format_subtitle("MULTIPLE ALIGNMENT");
      print $line;
      while (!($line =~ /Program parameters/)){
	$line = <$infile>;
	$i_line ++;
	if (!($line =~ /Program parameters/)){
	  # if meet "\n", the next line is label line
	  # add these two lines to text
	  if ($line eq "\n"){
	    
	    $text = $text.$line;
	    $line = <$infile>;
	    $i_line ++;
	    if ($i_line == 2){
	      $num_space = &cal_space($line);
	    }
	    if (!($line =~ /Program parameters/)){ 
	      $text = $text.$line; 
	    }
	  }
	  #
	  # otherwise, add color the the upper case letter in sequence
	  # 
	  else{
	    my $new_line = &add_color_uppercase($line, $alphabet);
	    $text = $text.$new_line;
	  }
	  
	}
      }
      $line = &format_pre($text);
      print $line;
      last;
    }
  }

} #make_mhmma

#############################################################################
# Calculate how many white spaces before the first character of a line.
# This function is called by make_mhmma.
#
# USAGE: $num_space = &cal_space($line);
############################################################################
sub cal_space{
  my ($line) = @_;
  my ($num_space) = 0;

  for (my $i = 0; $i < length($line); $i++) {
    if (substr($line,$i,1) eq " "){
      $num_space++;
    }
    else{
      last;
    }
  }
  return ($num_space);
} #cal_space

############################################################################
# Generate HTML output from mhmms and mhmmscan input.
############################################################################
sub make_mhmms {
  my($program, $alphabet, $infile, $input_type) = @_;

  # Print the section header.
  &print_subtitle("DATABASE SEARCH RESULTS");

  # Read, store and print the search results header line.
  my $line = <$infile>;
  chomp($line);
  my @titles = split(' ', $line);
  print("<TABLE SUMMARY='' BORDER=\"1\">\n");
  print("<TR>\n");
  print(&add_table_delimiters($line));
  print("</TR>\n");
  
  # Skip the line of hyphens.
  $line = <$infile>;

  # Read the search results.
  print(STDERR "Reading sequence scores.\n");
  $line = <$infile>;
  my $i_seq = 0;
  my @ids;
  while (!($line =~ /-------/)) {
    chomp($line);

    # Parse and print the line.
    print("<TR>");
    my $i_word = 0;
    foreach my $word (split(' ', $line)) {
      if ($i_word == 0) {
	print("<TD>");
	print(&add_entrez_link($word, $alphabet));
	print("</TD>");
	# Store the ID for use in parsing alignments later.
	$ids[$i_seq] = $word;
      } elsif ($i_word < scalar(@titles) - 1) {
	print("<TD>$word</TD>");
      } elsif ($i_word == scalar(@titles) - 1) {
	print("<TD>$word");
      } else {
	print(" $word");
      }
      $i_word++;
    }
    print("</TD></TR>\n");

    $line = <$infile>;
    $i_seq++;
  }
  print("</TABLE><BR><BR>\n");
  $num_seqs = $i_seq;
  $ids[$num_seqs] = "";
  print(STDERR "Read $num_seqs sequence scores.\n");

  # skip blank lines
  my $id_line = <$infile>;
  while ($id_line =~ /^\s*$/) {$id_line = <$infile>};

  if ($id_line =~ /^$ids[0]/) {	# alignments present?
    &print_pairwise_alignments($program, $alphabet, $infile, $id_line);
    &print_motif_diagrams($program, $alphabet, $infile, $id_line);
  }

} #make_mhmms

############################################################################
# Print pairwise alignments from mhmms and mhmmscan input.
#
# Assumes that the first id line has been read and is in $id_line.
# Uses globals:
#	@ids
############################################################################
sub print_pairwise_alignments {
  my($program, $alphabet, $infile, $id_line) = @_;

  my $got_scores = 0;

  &print_subtitle("ALIGNMENTS");
  print("<table summary='' border=\"1\">\n");
  print("<tr>\n");
  print("<td><CENTER>ID<BR>E-value<BR>Score<BR></CENTER></td>");
  print("<td><center>Alignment</center></td>\n");
  print("</tr>\n");

  # Initialize the text diagram.
  $text_diagram = "SEQUENCE NAME\tE-VALUE\tMOTIF DIAGRAM\n";
  $text_diagram .= "-------------\t-------\t-------------\n";

  # Read the alignment for each sequence.
  print(STDERR "Reading alignments for $num_seqs sequences.\n");
  for (my $id_seq = 0; $id_seq < $num_seqs; $id_seq++) {

    # Store the sequence information.
    my($id, $evalue, $score) = split(' ', $id_line);
    printf("<TR><TD><CENTER>%s<BR>%s<BR>%s<BR></CENTER></TD>",
	   &add_entrez_link($id, $alphabet),
	   $evalue, $score);

    # Read the entire alignment for this sequence.
    my $raw_alignment = "";
    my $this_alignment = "";
    my $colored_alignment = "";
    my $line;
    $line = <$infile>;
    while (!&is_id_line($line) && !&is_divider_line($line)) {
      $raw_alignment .= $line;
      $line = <$infile>;
    }
    my(@lines) = split("\n", $raw_alignment);
    my($num_lines)= scalar(@lines);
    my($lines_per_block)= 0;

    # The number of lines in an alignment should be a multiple of 6 if
    # a score is provided and a multiple of 5 if no score provided
    if ($num_lines != 0 && $num_lines % 6 == 0) {
      $got_scores = 1;
      $lines_per_block = 6;
    } elsif ($num_lines != 0 && $num_lines % 5 == 0) {
      $got_scores = 0;
      $lines_per_block = 5;
    } else {
      print(STDERR "The alignment for sequence $id had an unexpected number of lines.\n");
      print(STDERR "$num_lines were found. Expected a multiple of 5 or 6\n");
      exit(1);
    }

    # Parse each block in the raw alignment
    for (my $i = 0; $i < $num_lines; $i += $lines_per_block) {
      my $pvalue_sequence;
      my $motif_sequence;
      my $model_sequence;
      my $match_sequence;
      my $sequence;
      if ($got_scores) {
	$pvalue_sequence = $lines[$i + 1];
	$motif_sequence = $lines[$i + 2];
	$model_sequence = $lines[$i + 3];
	$match_sequence = $lines[$i + 4];
	$sequence = $lines[$i + 5];
      } else {
	$motif_sequence = $lines[$i + 1];
	$model_sequence = $lines[$i + 2];
	$match_sequence = $lines[$i + 3];
	$sequence = $lines[$i + 4];
      }
      # Test for valid sequence as a sanity check
      if (!($sequence =~ /^\s*\d+\s+[a-zA-Z]+\s+\d+\s*$/)) { 
       	die("File is not in proper format at sequence $id.\n");
      }

      # Add color to the model sequence and the sequence.
      my($colored_model_sequence, $colored_sequence) 
	  = &add_color_in_parallel($model_sequence, $sequence, $alphabet);

      # Store everything in this alignment, with and without color.
      $this_alignment .= "\n";
      if ($got_scores) {
	$this_alignment .= $pvalue_sequence . "\n";
      }
      $this_alignment .= $motif_sequence . "\n";
      $this_alignment .= $model_sequence . "\n";
      $this_alignment .= $match_sequence . "\n";
      $this_alignment .= $sequence . "\n";

      $colored_alignment .= "\n";
      if ($got_scores) {
	$colored_alignment .= $pvalue_sequence . "\n";
      }
      $colored_alignment .= $motif_sequence . "\n";
      $colored_alignment .= $colored_model_sequence . "\n";
      $colored_alignment .= $match_sequence . "\n";
      $colored_alignment .= $colored_sequence . "\n";
    }

    # Format and print this alignment.
    printf("<td>%s</td>\n", &format_pre($colored_alignment));
    print("</tr>\n");

    # Store this alignment for later.
    $text_diagram .= &format_text_diagram($this_alignment, $id, $evalue,
					     $program);
    if (&is_id_line($line)) {
      $id_line = $line;
    }
  }
  print("</TABLE>\n");
} # print_pairwise_alignments

############################################################################
# Print motif diagrams from mhmms and mhmmscan input.
############################################################################
sub print_motif_diagrams {
  my($program, $alphabet, $infile) = @_;

  # Print the motif diagrams graphically.	
  print(STDERR "Creating the motif occurrence diagram.\n");

  #print(STDERR "text_diagram=$text_diagram\n");

  &print_subtitle("MOTIF DIAGRAMS");
  print(&format_diagrams($SCALE  / $diagram_scale, # sequence positions/pixel
			 $MAX_DIAGRAM * $diagram_scale, # maximum number of pixels per diagram
			 $text_diagram,
			 $alphabet, # Database
			 $alphabet eq "dna" ? "c" : "p", # stype
			 0,  # xlate (not translating DNA to protein)
			 0,  # no buttons on left
			 "E-value",
			 0, # leave gi names.
			 '\t',	# field delimiter
			 undef,	# skip list
			 \%WIDTHS	# width list
			 ));

  print(&format_hidden("motif diagrams plain text", $text_diagram));
} # print_motif_diagrams


###############################################################
# Add table delimiters to a white-space delimited line.
###############################################################
sub add_table_delimiters {
  my ($line) = @_;
  chomp ($line);
  my @words = split(' ', $line);
  $line = join("</TD><TD>", @words);
  return("<TD>$line</TD>");
}

##############################################################
# Add color to a letter.
##############################################################
sub colorize {
  my($letter, $alphabet) = @_;
  my($color);

  $color = &get_color($alphabet, $letter);
  if ($color eq "BLACK") {
    return($letter);
  } else {
    return("<B CLASS=\"" . &get_color($alphabet, $letter)
           . "\">$letter</B>");
  }
}

##############################################################
# Add color to uppercase letters in a text string.
##############################################################
sub add_color_uppercase {
  my($text, $lowercase, $alphabet) = @_;
  my($return_text, $num_letters, $i_letter, $this_char);

  $return_text = "";
  $num_letters = length($text);
  for ($i_letter = 0; $i_letter < $num_letters; $i_letter++) {
    $this_char = substr($text, $i_letter, 1);

    if ($this_char =~ /[A-Z]/) {
      $return_text .= &colorize($this_char, $alphabet);
    } else {
      $return_text .= $this_char;
    }
  }
  return($return_text);
}

##############################################################
# Add color to two strings of equal length.  Only add color in
# positions where the first string has an alphabetic character.
##############################################################
sub add_color_in_parallel {
  my($first_text, $second_text, $alphabet) = @_;

  my $return_first_text = "";
  my $return_second_text = "";
  my $num_letters = length($first_text);
  for (my $i_letter = 0; $i_letter < $num_letters; $i_letter++) {

    # Get the corresponding characters from each string.
    my $first_char = substr($first_text, $i_letter, 1);
    my $second_char = substr($second_text, $i_letter, 1);

    # Only add color to alphabetic characters.
    if ($first_char =~ /[A-Z]/) {
      $return_first_text .= &colorize($first_char, $alphabet);
      $return_second_text .= &colorize($second_char, $alphabet);
    } else {
      $return_first_text .= $first_char;
      $return_second_text .= $second_char;
    }
  }

  return($return_first_text, $return_second_text);
}

#####################################################################
# add hyperlink to the introduction part
#####################################################################
sub add_hyperlink{
  my($line) = @_;
  chomp($line);
  my(@temp) = split(': ',$line);
  if ($temp[0] =~ /HMM file/){
    $line = $temp[0].": ";
    $temp[1] = "<a href = \"$temp[1]\">$temp[1]</a>";
    $line .= "$temp[1] ";
    $line .= "This file will be stored online for three days";
    return $line;
  }
  else{
    return $line;
  }
}

#############################################################################
# make_explanation
#
# USAGE: &make_explanation($program)
#############################################################################
sub make_explanation(){
  my ($program) = @_;

  my $subtitle;
  if ($program eq "mhmmscan" || $program eq "mhmms") {
    $subtitle = "EXPLANATION OF OUTPUT";
  } elsif($program eq "mhmma") {
    $subtitle = "EXPLANATION OF MULTIPLE ALIGNMENT";
  } elsif($program eq "mhmm") {
    $subtitle = "EXPLANATION OF THE METAMEME MODEL FILE";
  }

  my $file = "$docdir/$program.html";

  my $explanation = &print_subtitle($subtitle);

  # copy the html file contents minus the header and tailer
  $explanation .= &copy_html_file($file, "START OUTPUT", "END OUTPUT");

  print($explanation);
} #make_explanation

#############################################################################
# copy_html_file
#
# USAGE $text = &copy_html_file($infile, $start, $stop)
#
# Returns all lines between the $start and $stop markers in an HTML file.
#
#############################################################################
sub copy_html_file(){
  my ($infile_name, $start, $stop) = @_;
  my($infile);

  open($infile, "<$infile_name") || die("Can't open $infile_name.");
  
  my $out = "";
  my $copying = 0;
  while (<$infile>) {
    last if (/$stop/);
    if (!$copying) {$copying = /$start/; next;}
    if ($copying) { $out .= $_; }
  }
  return($out);
} #copy_html_file

#######################################################################
# Format Text Diagram
#
# Convert a Meta-MEME alignment into a motif occurrence diagram.
# Each diagram looks like this: [1]_5_[4]_43_[3], where numbers 
# in brackets are motif indices, and other numbers are spacer lengths.
# 
# N.B. This function assumes that the motif occurences are complete.
#######################################################################
sub format_text_diagram{
  my($line, $id, $e_value, $program) = @_;

  my($white_space) = 8;
  my $new_line = "";

  # Split the diagram into lines.
  my @lines = split("\n", $line);

  # If the number of lines is a multiple of 5 then there is no score line.
  # If the number of lines is a multiple of 6 then there is a score line.
  # If the number of lines is not a multiple of 5 or 6 something is wrong.
  my $numlines = scalar(@lines);
  my $numlines_per_group = 0;
  my $motifline;
  if ($numlines > 0 && (($numlines % 5) == 0)) {
    $motifline = 1;
    $numlines_per_group = 5;
  } elsif ($numlines > 0 && (($numlines % 6) == 0)) {
    $motifline = 2;
    $numlines_per_group = 6;
  } else {
    print(STDERR "The number of lines in the alignment $id is $numlines, expected a multiple of 5 or 6\n");
    exit(1);
  }

  # Get the starting position of the alignment.
  my @words = split(' ', $lines[$numlines_per_group - 1]);
  my($start_point);

  if ($program eq "mhmmscan") {
    $start_point = 0;
  }
  # Otherwise, begin at the start of the sequence.
  else {
    $start_point = $words[0];
  }

  # Read every fourth or fifth line of the alignment and remove
  # whitespace on ends.  This concatenates the motif line from all the
  # alignment blocks
  my $label_line = "";
  for (my $i_line = $motifline; $i_line < $numlines; 
       $i_line += $numlines_per_group) {
    my $substring = $lines[$i_line];
    $substring =~ s/^\s+//;
    $substring =~ s/\s+$//;
    $label_line .= $substring;
  }

  # Store the ID and E-value.
  $id = &add_entrez_link($id, $alphabet);
  my $return_value = "$id\t$e_value\t";

  # Find all motifs in the line. 
  # Motifs are marked by strings of the form "*----6----*"
  my $end_motif = 0;
  while ($label_line =~ /\*_*([+-]?\d+)_*\*/g) {
    my $start = index $label_line, $&, $end_motif;
    my $w = length($&);
    my $end = $start + $w;
    my $space = $start - $end_motif;
    my $name = $1;
    # save the width of the motif in a global for use in making diagrams
    $name =~ /[+-]?(\d+)/;
    my $motif_id = $1;
    $WIDTHS{$motif_id} = $w;
    $return_value .= "_" . $space . "_[" . $name . "]";
    $end_motif = $end;
  }

  # Special case: no motifs.
  if ($end_motif == 0) {
    $return_value .=  $start_point +  length($label_line);
  } else {
    $return_value .= "_";
    $return_value .= length($label_line) - $end_motif;
  }
  $return_value .= "\n";

  #print(STDERR "return_value=$return_value\n");
  return($return_value);
}

sub make_parameters {
  my($infile) = @_;

  &print_subtitle("PROGRAM PARAMETERS");

  while (my $line = <$infile>) {
    print("$line<BR>\n");
  }
  #close($infile);
}

sub is_id_line() {
  my($line)= @_;
  my $result = 0;

  # N.B. This line needs a comment!
  if ($line =~ /^[\S]+.*$/) {
    #print(STDERR "ID line: $line");
    $result = 1;
  }
  return($result);
}

sub is_divider_line() {
  my($line) = @_;
  my $result = 0;

  # N.B. This line needs a comment!
  if ($line =~ /^ -+$/) {
    #print(STDERR "Divider line: $line");
    $result = 1;
  }
  return($result);
}
