$title

#!@WHICHPERL@ -w # AUTHOR: Haoyuan Zhu and William Stafford Noble and Timothy L. Bailey # CREATE DATE: 1/29/2002 # PROJECT: MHMM # DESCRIPTION: Convert Meta-MEME files to HTML format. use strict; # To do list: # o Automatically determine alphabet. # o Format header better. # o Handling of mhmma files is untested. # o Determine correct parameters for format_diagrams(). # o The block diagrams are too wide. # o Update explanation. (Done for mhmmscan.) ### GLOBAL VARIABLES. # Usage message. my $usage = "USAGE: mhmm2html [options] Options: -alphabet dna|protein (default=dna) -diagram-scale (default=1)\n"; # Get the documentation directory path by finding the my $docdir = "@DOCDIR@"; # process_request.cgi is used to print plain text file #my $process_request = "http://meme-suite.org/cgi-bin/process_request.cgi"; my $process_request = "../cgi-bin/mhmm_process_request.cgi"; # Dimensions of motif occurrence diagrams. my $SCALE = 0.25; # sequence_positions/pixel my $MAX_DIAGRAM = 100000; # maximum number of pixels per diagram # Global list of motif widths my %WIDTHS; # Section header names. my @mhmm_sections = ("HMM STATES", "HMM TRANSITIONS", "EXPLANATION OF THE METAMEME MODEL FILE", "PROGRAM PARAMETERS"); my @mhmma_sections = ("MULTIPLE ALIGNMENT"); my @mhmms_sections = ("DATABASE SEARCH RESULTS", "ALIGNMENTS", "MOTIF DIAGRAMS", "EXPLANATION OF OUTPUT", "PROGRAM PARAMETERS"); my @colors = ("#DDDDFF", "#00FFFF", "#DDFFDD", "#FFFF00", "#DDAA00"); # Define buttons for navigation within document my %buttons = ( "top", "Go to top!#DDDDFF!#000000" ); my $text_diagram; my $num_seqs; ############################################################################## # Functions originally from convert2html... # as this is the only remaining script that uses convert2html the # functions have been merged to save clutter... # START convert2html.pl ############################################################################## # # subroutines and globals used by mhmm2html # my $DIVIDER = "^\\*\\*\\*\\*\\*"; # section divider in output my $SUBDIV = "^--------------------------------------------------------------------------------"; # subsection divider in output my $ELIPSIS = " ·
·
· "; my $BODY = "#D5F0FF"; # the background color of the page (light blue by default) my $WEAK_FONT = "50% sans-serif"; # font size for weak motifs my $THIN_LINE = 4; # thickness of thin spacer lines my $FAT_LINE = 8; # thickness of fat spacer lines (for too long seqs) my $MIN_WIDTH = 30; # minimum width (in pixels) for motifs my $MAX_NAME_LEN = 34; # maximum length of truncated sequence name # Colors for the motifs and their labels (motif numbers). my @MOTIF_COLORS=( "aqua", "blue", "red", "fuchsia", "yellow", "lime", "teal", '#444444', "green", "silver", "purple", "olive", "navy", "maroon", "black", "white" ); my @MOTIF_LABEL_COLORS=( "black", "white", "white", "black", "black", "black", "white", "white", "white", "black", "white", "black", "white", "white", "white", "black" ); my @IC_COLORS=( "red", "blue", "orange", "green", "black", "magenta", "pink", "yellow", "turquoise" ); # Added this global here--must be the same as the global in all the calling # programs (meme2html, mast2html), but they will obsolete soon! my $MBPSUB = "XXX---XXX"; #-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* #-* SUBROUTINES #-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* #------------------------------------------------------------------------------ # print the HTML header (including style sheet) and set the body color # $title title of HTML page # $body_color background color; light blue if omitted #------------------------------------------------------------------------------ sub print_header { my ( $title, # title of HTML page $body_color # background color; light blue if omitted ) = @_; my ($i); if (defined($body_color)) { $BODY = $body_color; } print < $title END # print the style sheet print '\n"; # # end the header and start the body # print "\n"; print "\n"; } # print_header #------------------------------------------------------------------------------ # find a section marker with the specified keyword (case sensitive) #------------------------------------------------------------------------------ sub find_section { my( $key ) = @_; my( $line ); LINE: while ( ) { unless ( /\*\*\*\*\*/ ) { next LINE; } $line = ; unless ( $line =~ /$key/ ) { ; next LINE; } last; } return( $line ); } # find_section #------------------------------------------------------------------------------ # next_section: # Find a section marker and return the title line. The following line of # stars is removed. # # USAGE: $text = &next_section(); #------------------------------------------------------------------------------ sub next_section { my( $div ) = @_; my( $line ); unless ( defined($div) ) { $div = $DIVIDER; } while ( ) { unless ( /^$div/ ) { next; } $line = ; ; last; } chop $line; return( $line ); } # next_section #------------------------------------------------------------------------------ # read_block: # Read a block of text until terminated by a blank line. #------------------------------------------------------------------------------ sub read_block { my( $line ); LINE: while ( ) { if ( /^\s*$/ ) { last LINE; } $line .= $_; } return( $line ); } # read_block #------------------------------------------------------------------------------ # next_block: # Read the next block of text until terminated by a divider line. The divider # by default, is specificed in $DIVIDER (normally a line of *). If a # parameter is passed in, it is used as the divider. # Removes \r from input. # # USAGE: $text = &next_block(); # $text = &next_block( divider ); #------------------------------------------------------------------------------ sub next_block { my( $div ) = @_; my( $line ); unless ( defined($div) ) { $div = $DIVIDER; } while ( ) { s/\r//g; if ( /$div/ ) { last; } $line .= $_; } return( $line ); } # next_block #------------------------------------------------------------------------------ # format_section: # Add HTML formatting for a section or subsection head. # The specified heading appears as # the section title with the specified name as an internal link. # # USAGE: $text = &format_section( pre, link, rest, name, ext) #------------------------------------------------------------------------------ sub format_section { my( $pre, # just print this part $link, # add link to this part $rest, # just print this part $name, # tag is "$name$ext" $ext # ref is to "$name_doc" ) = @_; my( $out, $ref, $tag ); if ( $name =~ /^\s*$/ ) { # all blank name $out = "

$pre \n $link $rest\n

"; } else { # name given $ref = $name . "_doc"; $tag = $name . $ext; $link = "$link"; $out = "

\n $pre $link $rest\n

"; } return($out); } # format_section #------------------------------------------------------------------------------ # format_para: # Add HTML formatting for a paragraph. New lines are ignored, blank lines are # converted to paragraphs. # # USAGE: $new_text = &format_para( text ); #------------------------------------------------------------------------------ sub format_para { my ( $line ) = @_; my( $out ); $out = "

\n$line"; $out =~ s/\n\s*\n/\n

\n/g; return( $out ); } # format_para #------------------------------------------------------------------------------ # format_pre: # Add HTML formatting for a preformatted block. # # USAGE: $new_text = &format_pre( text ); #------------------------------------------------------------------------------ sub format_pre { my ( $line ) = @_; my( $out ); $out = ($line=~ /\w/) ? "

\n$line

\n" : ""; return( $out ); } # format_pre #------------------------------------------------------------------------------ # format_diagrams: # Convert the text diagrams to colored diagrams in HTML. The scale is 1/$scale # pixels per sequence position for sequences of up to $max_diagram*$scale pixel. # Diagrams for longer sequences are scaled to fit in $max_diagram pixels # and the spacer lines are made thicker (and motif boxes may shrink). # Weak motifs are labeled with font size $WEAK_FONT. # When the motifs are protein and the database DNA, the motif widths are # multiplied by 3 since they are in codon units to start with. # Uses global variables @MOTIF_COLORS, $WEAK_FONT, $FAT_LINE, $THIN_LINE. # # # Sets global $META to contain information from SUMMARY of MOTIFS for # Meta-MEME. # #------------------------------------------------------------------------------ sub format_diagrams { my( $scale, $max_diagram, $text, $db, $stype, $xlate, $make_buttons, $col2hdr, $no_gi_names, $field_delim, $skip, $width_ref) = @_; my( $i, $out, @line, $l, $w, $wid_sum, $nmotifs, $nspacers); my( $max_spacers, $max_motifs, $name, $evalue, $diagram); my( @field, $f, $motif, $link, $seqno ); my( @scale1, $col, $color, $font, $fsize, $wide, $fill, $mscale ); my( $ncol, $dist, $w2, $loc, @nocc, @seqlen, $lno, $ncolors ); my $re_en = "\$[+-]?\\d*\\.?\\d*e[+-]?\\d+\$"; # an e-format in parens # start a table and header row $out = "\n"; $ncol = 0; # number of columns in table $ncolors = scalar(@MOTIF_LABEL_COLORS); # number of distinct motif colors # put buttons linking to score and annotation? if ($make_buttons) { $out .= "\n "; if ($make_buttons) { $evalue = "$evalue"; my $button; if ( $name =~ /\|/ ) { $button = make_button_panel("!", \%buttons, $db."entrez!$name", "score!$seqno", "align!$seqno", "help" ); } else { $button = make_button_panel("!", \%buttons, "score!$seqno", "align!$seqno", "help" ); } $out .= "

Links"; $ncol += 1; } if ($stype eq "s") { # scoring DNA strands separately $out .= "

Name

Strand

$col2hdr

Motifs\n"; $ncol += 4; } else { # PROTEIN $out .="

Name

$col2hdr

Motifs\n"; $ncol += 3; } # split the text into lines $text =~ s/\n\s+//g; # concat continued lines @line = split /\n/, $text; # find the width of each diagram if ($xlate) { # translating DNA $mscale = 3; } else { $mscale = 1; } my $max_width = 0; # remove header lines for ($i=0; $i<=$#line; $i++) { $l = $line[$i]; last if ($l =~ /-------------/); } while ($i>=0) { shift @line; $i--; } # # calculate the approximate width of diagram # $lno = 0; # line number $max_motifs = $max_spacers = 0; # no diagrams read yet $seqno = 0; foreach $l (@line) { if ($stype eq "s") { my $strand; ($name,$strand,$evalue,$diagram) = ($field_delim eq " ") ? split( " ", $l ) : split( $field_delim, $l ); } else { ($name,$evalue,$diagram) = ($field_delim eq " ") ? split( " ", $l ) : split( $field_delim, $l ); } # get sequence number $seqno++; # skip this sequence? if (defined($skip) && defined($skip->[$seqno])) { next; } # split the diagram into fields @field = split( '[ _]', $diagram ); # calculate the approximate width $wid_sum = $nocc[$lno] = 0; foreach $f (@field) { if ( $f =~ /[<>\[\]]/ ) { # motif occurrence ($motif) = $f =~ /[<\[][+-]?(\d+)[abc]?($re_en)?[>\]]/; $wid_sum += $$width_ref{$motif} * $mscale; $seqlen[$lno] += $$width_ref{$motif}; # length of sequence $nocc[$lno]++; # number of motif occurrences } elsif ($f ne "") { # spacer $seqlen[$lno] += $f; # length of sequence $wid_sum += $f; } } # calculate a scale so that diagram fits in $max_diagram pixels $scale1[$seqno] = $scale; if ($wid_sum/$scale > $max_diagram) { $scale1[$seqno] = $wid_sum/$max_diagram; } # calculate the exact scaled diagram width $wid_sum = 0; $nmotifs = 0; $nspacers = 0; foreach $f (@field) { if ( $f =~ /[<>\[\]]/ ) { ($motif) = $f =~ /[<\[][+-]?(\d+)[abc]?($re_en)?[>\]]/; $wide = int($mscale*$$width_ref{$motif}/$scale1[$seqno]+0.5); if ($wide < $MIN_WIDTH) { $wide = $MIN_WIDTH; } $nmotifs++; } elsif ($f ne "") { $wide = $f/$scale1[$seqno]; $wide = int($wide + 0.5); # round to integer $nspacers++; } else { $wide = 0; } $wid_sum += $wide; } # save the length of the longest diagram if ($wid_sum > $max_width) { $max_width = $wid_sum; $max_motifs = $nmotifs; $max_spacers = $nspacers; } $lno++; # line number } # line # kludge for Netscape 4.0; make width larger $max_width += 14 + (2*$max_motifs) + (3*$max_spacers); # set max_width to at least my $min_width = int(50.0/$scale + 0.5); if ($max_width < $min_width) { $max_width = $min_width; } # make the diagrams my $META = ""; $lno = 0; # line number $seqno = 0; foreach $l (@line) { my $strand; # length of space holder at end of diagram if ($stype eq "s") { ($name,$strand,$evalue,$diagram) = ($field_delim eq " ") ? split( " ", $l ) : split( $field_delim, $l ); } else { ($name,$evalue,$diagram) = ($field_delim eq " ") ? split( " ", $l ) : split( $field_delim, $l ); } $seqno++; if (defined($skip) && defined($skip->[$seqno])) { if (!defined($skip->[$seqno-1])) { $out .= "

$ELIPSIS\n"; } next; # skip this sequence } # start row of table $out .= "

$button\n"; } # make_buttons # write name of sequence if ($no_gi_names) { $name =~ s/^gi\|\d+\|//; # leading gi|123 removed } $link = "$name"; $out .= "

$link\n"; $META .= "$name"; if ($stype eq "s") { $out .= "

$strand\n"; } $out .= "

$evalue\n"; $META .= " $evalue $nocc[$lno] $seqlen[$lno]"; $out .= "

\n"; @field = split( '[ _]', $diagram ); $fsize = $THIN_LINE; # font size for spacer line if ($scale1[$seqno] != $scale) { $fsize = $FAT_LINE; } my $tail = $max_width; my $position = 0; # position in sequence foreach $f (@field) { my ($st, $frame, $pv); if ($f eq "") { next; } elsif ( $f =~ /[<>\[\]]/ ) { # motif occurence ($st, $motif, $frame, $pv) = $f =~ /[<\[]([+-]?)(\d+)([abc]?)($re_en)?[>\]]/; if (!defined $pv) { $pv = "(?)"; } ($pv) = $pv =~ /$([^)]+)$/; $wide = int($mscale*$$width_ref{$motif}/$scale1[$seqno]+0.5); if ($wide < $MIN_WIDTH) { $wide = $MIN_WIDTH; } if ($motif eq ""){ $color = "gray"; $motif = " "; } else { $col = ( $motif - 1 ) % $ncolors; $color = $MOTIF_COLORS[$col]; } # set color and size of motif label; # weak motifs have font size $WEAK_FONT $font = ""; my $endfont = ""; my $class = ($f =~ /$st$motif$frame\n"; $META .= " $st$motif $position $pv"; # for meta-meme $position += $$width_ref{$motif}; # letter position in sequence } elsif ($f ne "") { # spacer $wide = $f/$scale1[$seqno]; $wide = int($wide + 0.5); # round to integer $out .= "

\n"; $position += $f; # letter position in sequence } $tail -= $wide; } if ($tail > 0) { $out .= "

\n"; } $out .= "

\n"; $META .= "\n"; # metameme data $lno++; # line number } # line # # print a scale # $dist = 50; # pixel distance between rules $w2 = $dist - 1.0/$scale; # distance to second rule $ncol--; # number of columns scale spans $out .= "

SCALE\n"; $out .= "

\n"; $out .= " \n"; for ($i=$dist; $i<$max_width; $i+=$dist) { $loc = $i * $scale; last if ($i > $max_width-50); # make sure number will fit $out .= " \n"; } $out .= " \n"; for ($i=$dist; $i<$max_width; $i+=$dist) { $loc = $i * $scale; last if ($i > $max_width-50); # make sure number will fit $out .= " \n"; } $out .= "

\|	\|
1	$loc

\n"; # end the table $out .= "

\n"; return( $out ); } # format_diagrams #------------------------------------------------------------------------------ # find_line #------------------------------------------------------------------------------ sub find_line { my( $key ) = @_; my( $line ); LINE: while ( $line = ) { unless ( $line =~ /$key/ ) { next LINE; } last; } return( $line ); } # find_line #------------------------------------------------------------------------------ # make_button_panel: # # Use the input associative array, and the specified keys to create a HTML # table with labeled buttons. # # The buttons are defined in the associative array passed in the second # parameter. This array is indexed by the key, and contains three fields # separated by the divider specified in the 1st parameter. The fields # are the tag template, the background color, and the font color. Note that # a tag template that is a link will override the font color. # # The tag template is a string that, optionally, can have the key substituted # into it at all positions marked by $MBPSUB (this is a symbol to avoid # collisions with arbitrary strings). # # Note that each line containing a button panel should be followed with a #
to prevent the next line from being indented to the position # following the panel. This is because, even though the panel uses a -1 font, # it is still taller than the text and therefore creates an apparent indent in # the same way a drop capital would. # # USAGE: # = &make_button_panel(

ID E-value Score	Alignment
%s %s %s	%s