#!@WHICHPERL@

my $DEFAULT_EVT = 0.05;
my $DEFAULT_MINW = 6;
my $DEFAULT_MAXW = 15;
my $DEFAULT_CTRIM = 0;
my $DEFAULT_ALIGN = "center";
my $DEFAULT_GROUP_THRESH = 0.05;
my $DEFAULT_VERBOSITY = 1;
my $DEFAULT_MOTIF_PSEUDO = 0.01;
my $DEFAULT_MIN_STREME_MOTIFS = 3;

=head1 NAME

XSTREME (Motif Discovery and Enrichment Analysis)
  1) Performs de novo motif discovery using MEME and STREME. 
  2) Performs motif enrichment analysis using SEA to rank 
     the de novo motifs and motifs in user provided database(s).  
  3) Clusters similar motifs using Tomtom.

=cut

my $USAGE = <<"END_USAGE";
xstreme [options] --p <primary sequences> [--m <motifs>]*

 Options:
					***INPUT***
  --p                <path>  : primary sequence file name (required) 
  --m                <path>  : file of known motifs in MEME format; (optional, may be repeated)

					***OUTPUT***
  --o                <dir>   : output to the specified directory, failing if the directory exists
  --oc               <dir>   : output to the specified directory, overwriting if the directory exists

			***CONTROL SEQUENCES AND BACKGROUND MODEL***
  --n                <path>  : negative (control) sequence file name;
  			       the control sequences will be input to STREME and SEA, and
			       used to create a background file for MEME, STREME and SEA.
                               default: control sequences will be created by shuffling
			       the primary sequences for STREME and SEA, and a background
			       file for MEME will be created from the primary sequences
  --order            <m>     : the Markov order of the shuffling for creating negative sequences
			       (preserve k-mers where k=m+1), and the order of the
			       Markov background model to create if none given;
			       <m> must be in the range: [0,..,4];
                               default: 2 (DNA and RNA), 0 (Protein and Custom alphabets)
  --bfile            <path>  : background file to use with MEME, STREME and SEA; overrides creating of the
			       background file described under --n
  --seed             <seed>  : Random seed to be passed to MEME, STREME and SEA; default: 0

					***ALPHABET***
  --dna                      : set the alphabet to DNA; this is the default
  --rna                      : set the alphabet to RNA; default: DNA
  --protein                  : set the alphabet to PROTEIN; default: DNA
  --[x]alph          <path>  : alphabet file containing (possibly non-standard) alphabet;
                               if the input motifs (see --m) are in an alphabet that is
                               a subset of the input sequences, --xalph will also cause them
                               to be converted to the alphabet in the file; default: DNA
  --dna2rna                  : input DNA sequences will be treated as RNA and discoverd motifs
                               will use the RNA alphabet; known motifs be in the RNA alphabet
                               unless you also specify --xalph with an RNA alphabet file

			***OUTPUT FILTERING AND NUMBER OF MOTIFS***
  --evt               <evt>  : E-value threshold for including motifs; default: $DEFAULT_EVT
  --time          <minutes>  : maximum time (in seconds) that XSTREME has to run; default: no limit

					***MOTIF WIDTH***
  --minw             <num>   : minimum motif width; default: $DEFAULT_MINW
  --maxw             <num>   : maximum motif width; default: $DEFAULT_MAXW
  --w                <num>   : maximum motif width; overrides --minw and --maxw

					***MISC***
  --mea-only                 : just do Motif Enrichment Analysis (MEA) using the known motifs, 
                               no motif discovery; default: do motif discovery, too
  --ctrim             <num>  : centrally trim sequences to this size; default: $DEFAULT_CTRIM (no trim)
  --align left|center|right  : align sequences left/center/right for site positional 
                               distribution plots; default: $DEFAULT_ALIGN
  --group-thresh     <gthr>  : primary threshold for clustering motifs; default: $DEFAULT_GROUP_THRESH
  --group-weak       <gwk>   : secondary threshold for clustering motifs; default: 2*gthr
  --desc             <text>  : description of the job
  --dfile            <file>  : file containing plain text description of the job
  --help                     : display this help message
  --version                  : print the version and exit
  --verbosity [0|1|2|3|4|5]  : amount of outout information messages; default: $DEFAULT_VERBOSITY

 				***STREME SPECIFIC OPTIONS*** 
  --streme-evt        <evt>  : STREME stops if 3 consecutive motifs have E-value greater than <num>; 
                               default: value specified for --evt (or its default)
  --streme-nmotifs    <num>  : maximum number of motifs to find; overrides --streme-evt;
                               if =0, STREME will not be run
  --streme-totallength <num> : Restrict the maximum total length of the sequences used by STREME;
                               default: no limit 

 				***MEME SPECIFIC OPTIONS*** 
  --meme-evt         <evt>   : MEME stops if motif E-value is greater than <evt>; 
                               default: value specified for --evt (or its default)
  --meme-nmotifs     <num>   : maximum number of motifs to find; overrides --meme-evt;
                               if =0, MEME will not be run
  --meme-searchsize  <size>  : MEME will sample primary sequence datasets over <size> letters
  --meme-p           <np>    : use parallel version with <np> processors
  --meme-brief       <num>   : reduce size of MEME output files if more than <num> primary sequences
  --meme-mod [oops|zoops|anr]: The number of motif sites MEME will find per sequence; 
                               default: zoops

 				***SEA SPECIFIC OPTIONS*** 
  --sea-noseqs               : do not output the matching sequences TSV file (to save space);
                               default: output the matching sequences TSV file

 				***FIMO SPECIFIC OPTIONS*** 
  --fimo-skip                : don't run FIMO (to save time and space)

END_USAGE

use strict;
use warnings;

use Cwd qw(abs_path);
use Data::Dumper;
use Fcntl qw(O_CREAT O_RDONLY O_WRONLY O_TRUNC SEEK_SET);
use File::Basename qw(fileparse);
use File::Copy qw(copy);
use File::Path qw(mkpath);
use File::Temp qw(tempfile);
use File::Spec::Functions qw(abs2rel catfile catdir splitdir tmpdir);
use Getopt::Long;
use List::Util qw(min max);
use Pod::Usage;
use POSIX qw(floor);
use Time::HiRes qw(gettimeofday tv_interval);
use XML::Simple;

use lib qw(@PERLLIBDIR@);
use Globals;
use Alphabet qw(dna rna protein);
use Citation;
use ExecUtils qw(invoke stringify_args2);
use MotifUtils qw(intern_to_meme read_background_file);
use MotifInMemeXML;
use MotifInStremeXML;
use JsonRdr;
use HtmlMonolithWr;

# Globals
my $t0 = [&gettimeofday()];
my $logger = undef;
my $bin_dir = undef;
my $secondary_bin_dir = undef;
my $meme_bin_dir = undef;		# just may be in subdirectory parallel/
my $etc_dir = undef;
my $script_dir = undef;
my $temp_dir = undef;
my $version = undef;
my $revision = undef;
my $release = undef;
my $progress_log = undef;
my @cmd = (scalar fileparse($0), @ARGV);
my @jobs = ();

# Run XSTREME
&main();

#
# Setup some globals
#
sub initialise {
  # setup logging
  eval {
    require Log::Log4perl;
    Log::Log4perl->import();
    Log::Log4perl::init('@APPCONFIGDIR@/logging.conf');
    $logger = Log::Log4perl->get_logger('meme.service.xstreme');
    $SIG{__DIE__} = sub {
      return if ($^S);
      $logger->fatal(@_);
      die @_;
    };
    $logger->trace("call initialise");
  };

  # disable buffering on stdout (it should be selected by default)
  $| = 1;

  # setup primary binary directory
  $bin_dir = $ENV{'MEME_BIN_DIR'} ? $ENV{'MEME_BIN_DIR'} : '@BINDIR@';

  # setup secondary bin directory
  $secondary_bin_dir = $ENV{'MEME_SECONDARY_BIN_DIR'} ? $ENV{'MEME_SECONDARY_BIN_DIR'} : '@LIBEXECDIR@';

  # see if there is a parallel directory under $bin_dir for running meme
  $meme_bin_dir = (-e "$bin_dir/parallel") ? "$bin_dir/parallel" : $bin_dir;

  # setup script directory
  $script_dir = $ENV{'MEME_SCRIPT_DIR'} ? $ENV{'MEME_SCRIPT_DIR'} : '@LIBEXECDIR@';

  # setup etc directory
  $etc_dir = $ENV{'MEME_DATA_DIR'} ? $ENV{'MEME_DATA_DIR'} : '@APPCONFIGDIR@';

  # setup temporary directory
  $temp_dir = '@TMP_DIR@';
  # use the perl default if none is supplied or the replace fails
  $temp_dir = &tmpdir() if ($temp_dir eq '' || $temp_dir =~ m/^\@TMP[_]DIR\@$/);

  # store program info
  $version = '@ARCHIVE_VERSION@';
  $revision = '@ARCHIVE_REVISION@';
  $release = '@ARCHIVE_DATE@';
}

sub init_progress_log {
  my ($out_dir) = @_;
  my $progress_log_filename = catfile($out_dir, 'progress_log.txt');
  sysopen($progress_log, $progress_log_filename, 
    O_CREAT | O_TRUNC | O_WRONLY) or die("Failed to create progress log file.");
  # disable output buffering as this is meant to document when things go wrong
  my $old_fh = select($progress_log); $| = 1; select($old_fh);
}

sub time_allocate {
  my ($opts, $stage) = @_;
  # no limit unless maximum time set
  return undef unless $opts->{TIME};
  # calculate how long we had originally (convert to seconds)
  my $o = 60 * $opts->{TIME};

  # calculate how many seconds we have left
  my $r = $o - int(&tv_interval($t0, [&gettimeofday()]) + 0.5);
 
  # Set up the relative amounts of time needed for each stage.
  my %need = (
    fimo => 20,
    distr_sea => 20, 
    align_tomtom => 10,
    meme_tomtom => 10,
    streme_tomtom => 10,
    sea => 30,
    meme => 100,
    streme => 100
  );

  my $left = 0;
  my $want = $need{$stage};
  if (defined $want) {
    # Loop over stages in reverse order of their running to add the
    # remaining time needed.
    foreach my $s ( "fimo", "distr_sea", "align_tomtom", "meme_tomtom", "streme_tomtom", 
      "sea", "meme", "streme") {
      $left += $need{$s};
      last if ($s eq $stage);
    }
  } else {
    # Note: all other stages are essential so will be left to run
    # even if it looks like we will completely run out of time.
    $left = 1;
    $want = 1;
  }

  my $allocate = int($r * ($want / $left));

  return $allocate;
} # time_allocate

#
# Parse the arguments
#
sub arguments {
  $logger->trace("call arguments") if $logger;
  # Set Option Defaults
  my $options = {VERBOSITY => $DEFAULT_VERBOSITY, 
    CLOBBER => 1, OUT_DIR => 'xstreme_out',
    DESCRIPTION => undef, SEED => 0, CTRIM => $DEFAULT_CTRIM, ALIGN => $DEFAULT_ALIGN,
    POS_SEQS => undef, NEG_SEQS => undef, ALPH_NAME => 'DNA', ALPH_FILE => undef, XALPH => 0, DNA2RNA => 0,
    BFILE => undef, ORDER => undef, 
    GROUP_WEAK_THRESH => undef, GROUP_THRESH => $DEFAULT_GROUP_THRESH, 
    EVT => $DEFAULT_EVT, 
    TIME => undef,
    MINW => undef, MAXW => undef, W => undef, MEME_MOD => 'zoops', 
    MEME_EVT => undef, MEME_NMOTIFS => undef, 
    MEME_MINSITES => undef, MEME_MAXSITES => undef, 
    MEME_P => undef, MEME_SEARCHSIZE => undef,
    STREME_EVT => undef, STREME_NMOTIFS => undef, STREME_TOTALLENGTH => undef,
    DBS => [], VALGRIND => undef};
  # General Options
  my $help = 0; # FALSE
  my $show_version = 0; # FALSE
  my @errors = ();
  my @dbs = ();

  # get the options from the arguments
  my $options_success = 0; # FALSE
  my $options_err = undef;
  # redirect stderr to a temp file so we can get the error message from GetOptions
  my $olderr;
  my $tmperr = tempfile('GetOptions_XXXXXXXXXX', DIR => $temp_dir, UNLINK => 1);
  open($olderr, ">&STDERR") or die("Can't dup STDERR: $!");
  open(STDERR, '>&', $tmperr) or die("Can't redirect STDERR to temp file: $!");
  # parse options
  $options_success = GetOptions(
    'help|?'          => \$help,
    'version'         => \$show_version,
    'o=s'             => sub {$options->{CLOBBER} = 0; shift; $options->{OUT_DIR} = shift},
    'oc=s'            => \$options->{OUT_DIR},
    'desc=s'          => \$options->{DESCRIPTION},
    'dfile|fdesc=s'   => # slurp the description into a scalar
    sub {
      my ($param, $file) = @_;
      open(my $fh, '<', $file) or die("Couldn't open description file: $!\n");
      $options->{DESCRIPTION} = do { local( $/ ); <$fh>};
      close($fh);
    },
    'seed=i'            => \$options->{SEED},
    'ctrim=i'           => \$options->{CTRIM},
    'align=s'           => \$options->{ALIGN},
    'time=i'            => \$options->{TIME},
    'm=s'               => \@{$options->{DBS}},
    'p=s'               => \$options->{POS_SEQS},
    'n=s'               => \$options->{NEG_SEQS},
    'dna'               => sub {$options->{ALPH_NAME} = 'DNA'},
    'rna'               => sub {$options->{ALPH_NAME} = 'RNA'},
    'protein'           => sub {$options->{ALPH_NAME} = 'PROTEIN'},
    'alph=s'            => sub {$options->{ALPH_NAME} = 'CUSTOM'; shift; $options->{ALPH_FILE} = shift},
    'xalph=s'           => sub {$options->{XALPH} = 1; $options->{ALPH_NAME} = 'CUSTOM'; shift; $options->{ALPH_FILE} = shift},
    'dna2rna'           => \$options->{DNA2RNA},
    'mea-only'          => \$options->{MEA_ONLY},
    'bfile=s'           => \$options->{BFILE},
    'order=i'           => \$options->{ORDER},
    'minw=i'            => \$options->{MINW}, 
    'maxw=i'            => \$options->{MAXW}, 
    'w=i'               => \$options->{W}, 
    'group-thresh=f'    => \$options->{GROUP_THRESH},
    'group-weak=f'      => \$options->{GROUP_WEAK_THRESH},
    'evt=f'             => \$options->{EVT},
    'streme-evt=f'      => \$options->{STREME_EVT},
    'streme-nmotifs=i'  => \$options->{STREME_NMOTIFS},
    'streme-totallength=f'  => \$options->{STREME_TOTALLENGTH},
    'meme-mod=s'        => \$options->{MEME_MOD},
    'meme-evt=f'        => \$options->{MEME_EVT}, 
    'meme-nmotifs=i'    => \$options->{MEME_NMOTIFS}, 
    'meme-searchsize=i' => \$options->{MEME_SEARCHSIZE},
    'meme-p=s'          => \$options->{MEME_P},
    'meme-brief=i'      => \$options->{MEME_BRIEF},
    'sea-noseqs'        => \$options->{SEA_NOSEQS},
    'fimo-skip'         => \$options->{FIMO_SKIP},
    'verbosity=i'       => \$options->{VERBOSITY},
    'valgrind'		=> \$options->{VALGRIND}
  );
  # display version
  if ($show_version) {
    print STDOUT $version, "\n";
    exit(0);
  }
  # display help
  pod2usage($USAGE) if $help;
  # reset STDERR
  open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!");
  # read argument parsing errors
  seek($tmperr, 0, SEEK_SET);
  while (<$tmperr>) {chomp; push(@errors, $_);}
  close($tmperr);
  # by default make the weak threshold to 2 times the strong threshold
  $options->{GROUP_WEAK_THRESH} = $options->{GROUP_THRESH} * 2 unless $options->{GROUP_WEAK_THRESH};
  # check sequence
  unless (defined($options->{POS_SEQS})) {
    push(@errors, "No (primary) sequences provided.");
  } elsif (not -e $options->{POS_SEQS}) {
    push(@errors, "The sequences specified do not exist.");
  }
  # check the negative sequences exist
  if (defined($options->{NEG_SEQS}) && (not -e $options->{NEG_SEQS})) {
    push(@errors, "The control sequences specified do not exist.");
  }
  # check alphabet file
  if (defined($options->{ALPH_FILE}) && (not -e $options->{ALPH_FILE})) {
    push(@errors, "Value \"$options->{ALPH_FILE}\" invalid for option --alph (file does not exist).");
  }
  # check background file
  if (defined($options->{BFILE}) && (not -e $options->{BFILE})) {
    push(@errors, "Value \"$options->{BFILE}\" invalid for option --bfile (file does not exist).");
  }
  # check databases
  if (@{$options->{DBS}}) {
    foreach my $db (@{$options->{DBS}}) {
      unless (-e $db) {
        push(@errors, "Value \"$db\" invalid for option --m (file does not exist).");
      }
    }
  }
  # motif enrichment analysis only: turn off motif discovery and motif scanning
  if ($options->{MEA_ONLY}) {
    unless (scalar(@{$options->{DBS}}) > 0) { 
      push(@errors, "You must specify option --m at least once if you specify option --mea-only.")
    }
    $options->{MEME_NMOTIFS} = 0;
    $options->{STREME_NMOTIFS} = 0;
    $options->{FIMO_SKIP} = 1;
  }
  # check min width
  if (defined($options->{MINW}) && ($options->{MINW} < $MINW || $options->{MINW} > $MAXW)) {
    push(@errors, "Value $options->{MINW} invalid for option --minw (must be >= $MINW and <= $MAXW).");
    $options->{MINW} = undef;
  }
  # check max width
  my $MW = (defined($options->{MINW}) ? $options->{MINW} : $MINW);
  if (defined($options->{MAXW}) && ($options->{MAXW} < $MW || $options->{MAXW} > $MAXW)) {
    push(@errors, "Value $options->{MAXW} invalid for option --maxw (must be >= $MW and <= $MAXW).");
  }
  # check width; overrides --minw and --maxw
  if (defined($options->{W})) {
    if ($options->{W} < $MINW || $options->{W} > $MAXW) {    
      push(@errors, "Value $options->{W} invalid for option --w (must be >= $MINW and <= $MAXW).");
    }
    $options->{MINW} = $options->{MAXW} = $options->{W};
  }
  # give defaults for min width and max width
  unless (defined($options->{MINW})) {
    if (defined($options->{MAXW})) {
      $options->{MINW} = min($DEFAULT_MINW, $options->{MAXW});
    } else {
      $options->{MINW} = $DEFAULT_MINW;
    }
  }
  unless (defined($options->{MAXW})) {
    $options->{MAXW} = max($DEFAULT_MAXW, $options->{MINW});
  }
  # check evt; initialize meme-evt and streme-evt
  if ($options->{EVT} <= 0) {
    push(@errors, "Value \"$options->{EVT}\" invalid for option --evt (must be > 0).");
  } else {
    $options->{MEME_EVT} = $options->{EVT} unless defined($options->{MEME_EVT});
    $options->{STREME_EVT} = $options->{EVT} unless defined($options->{STREME_EVT});
  }
  # check ctrim 
  if ($options->{CTRIM} < $MINMEMESEQW && $options->{CTRIM} != 0) {
    push(@errors, "Value \"$options->{CTRIM}\" invalid for option --ctrim (must be >= $MINMEMESEQW or 0).");
  }
  # check align 
  if ($options->{ALIGN} ne "left" && $options->{ALIGN} ne "center" && $options->{ALIGN} ne "right") {
    push(@errors, "Value \"$options->{ALIGN}\" invalid for option --align (must be 'left', 'center' or 'right').");
  }
  # check time
  if (defined($options->{TIME}) && $options->{TIME} < 1) {
    push(@errors, "Value \"$options->{TIME}\" invalid for option --time (must be >= 1).");
  }
  # check background Markov order
  if (defined($options->{ORDER})) {
    if ($options->{ORDER} < 0 || $options->{ORDER} > 4) {
      push(@errors, "Value \"$options->{ORDER}\" invalid for option --order (must be >= 0 and <= 4).");
    }
  } else {
    if ($options->{ALPH_NAME} eq 'DNA' || $options->{ALPH_NAME} eq 'RNA' || $options->{DNA2RNA}) {
      # DNA or RNA
      $options->{ORDER} = 2;
    } else {
      # Protein or Custom alphabet
      $options->{ORDER} = 0;
    }
  }
  # check meme mode
  if ($options->{MEME_MOD} !~ m/^(oops|zoops|anr)$/) {
    push(@errors, "Value \"$options->{MEME_MOD}\" invalid for option --meme-mod (must be oops, zoops or anr).");
  }
  # check meme evt
  if (defined($options->{MEME_EVT}) && $options->{MEME_EVT} < 0) {
    push(@errors, "Value \"$options->{MEME_EVT}\" invalid for option --meme-evt (must be >= 0).");
  }
  # check meme motif count; override meme-evt
  if (defined($options->{MEME_NMOTIFS})) {
    if ($options->{MEME_NMOTIFS} < 0) {
      push(@errors, "Value $options->{MEME_NMOTIFS} invalid for option --meme-nmotifs (must be >= 0).");
    }
    $options->{MEME_EVT} = undef;
  }
  # check meme searchsize
  if (defined($options->{MEME_SEARCHSIZE}) && $options->{MEME_SEARCHSIZE} < 0) {
    push(@errors, "Value $options->{MEME_SEARCHSIZE} invalid for option --meme-searchsize (must be >= 0).");
  }
  # check meme p
  if (defined($options->{MEME_P}) && $options->{MEME_P} !~ m/^\s*[1-9]\d*(?:\s.*)?$/) {
    push(@errors, "Value \"$options->{MEME_P}\" invalid for option --meme-p (must begin with a positive integer).");
  }
  # check streme evt
  if (defined($options->{STREME_EVT}) && ($options->{STREME_EVT} < 0)) {
    push(@errors, "Value \"$options->{STREME_EVT}\" invalid for option --streme-evt (must be >= 0).");
  }
  # check streme motif count; override streme-evt
  if (defined($options->{STREME_NMOTIFS})) {
    if ($options->{STREME_NMOTIFS} < 0) {
      push(@errors, "Value \"$options->{STREME_NMOTIFS}\" invalid for option --streme-nmotifs (must be >= 0).");
    }
    $options->{STREME_EVT} = undef;
  }
  # check streme totallength
  if (defined($options->{STREME_TOTALLENGTH}) && $options->{STREME_TOTALLENGTH} <= 0) {
    push(@errors, "Value \"$options->{STREME_TOTALLENGTH}\" invalid for option --streme-totallength (must be > 0).");
  }
  # set MEA_ONLY if we are not doing any motif discovery
  if (
    (defined($options->{MEME_NMOTIFS}) && $options->{MEME_NMOTIFS} == 0) &&
    (defined($options->{STREME_NMOTIFS}) && $options->{STREME_NMOTIFS} == 0) 
  ) {
    $options->{MEA_ONLY} = 1;
    $options->{FIMO_SKIP} = 1;
  }
  # check verbosity
  if ($options->{VERBOSITY} < 0 || $options->{VERBOSITY} > 5) {
    push(@errors, "Value \"$options->{VERBOSITY}\" invalid for option --verbosity (must be >= 0 and <= 5).");
  }
  # set ECHO
  $options->{ECHO} = $options->{VERBOSITY} > 0;
  $options->{VERBOSITY}++ if ($options->{VERBOSITY} == 0);
  # check that we're not clobbering something we're not meant to
  if (-e $options->{OUT_DIR}) {
    unless (-d $options->{OUT_DIR}) {
      push(@errors, "Can't output to \"$options->{OUT_DIR}\" as it is not a directory.\n");
    } elsif (!$options->{CLOBBER}) {
      push(@errors, "Can't output to \"$options->{OUT_DIR}\" as the directory already exists.\n");
    }
  }
  # print usage and errors
  if (@errors) {
    foreach my $error (@errors) {
      print STDERR "ERROR: $error\n";
    }
    print(STDERR $USAGE) if @errors;
    exit(2); 
  }
    
  # return options
  return $options;
}

sub job_ok {
  $logger->trace("call job_ok") if $logger;
  my ($registry, $job) = @_;
  my $entry = $registry->{$job};
  return defined($entry) && $entry->{status} == 0;
}

sub nmotifs {
  $logger->trace("call nmotifs") if $logger;
  my ($registry, $job) = @_;
  my $entry = $registry->{$job};
  if (defined($entry) && $entry->{status} == 0) {
    return $entry->{NMOTIFS};
  }
  return 0;
}

sub log_to_str {
  my ($value, $prec) = @_;
  &log_10_to_str($value/log(10), $prec);
}

sub log_10_to_str {
  my ($value, $prec) = @_;
  my ($m, $e);
  $e = floor($value);
  $m = 10 ** ($value - $e);

  # check that rounding up won't cause a 9.9999 to go to a 10
  if ($m + (.5 * (10 ** -$prec)) >= 10) {
    $m = 1;
    $e += 1;
  }
  return sprintf("%.*fe%+04.0f", $prec, $m, $e);
}

sub str_to_log {
  my ($str) = @_;
  my ($m, $e);
  $m = 1;
  $e = 0;
  if ($str =~ m/^\+?(\d*\.?\d+)(?:[eE]([+-]?\d+))?$/) {
    $m = $1;
    $e = $2 if (defined($2));
  }
  return log($m) + ($e * log(10));
}

sub str_to_log_10 {
  my ($str) = @_;
  my ($m, $e);
  $m = 1;
  $e = 0;
  if ($str =~ m/^\+?(\d*\.?\d+)(?:[eE]([+-]?\d+))?$/) {
    $m = $1;
    $e = $2 if (defined($2));
  }
  return (log($m) / log(10)) + $e;
}

#
# Run a job and do something with the command line and stuff...
#
sub run_job {
  $logger->trace("call run_job") if $logger;
  my ($opts, $registry, $name, $required, $job, $suppress_messages, $outputs) = @_;
  # check that all requirements are available
  foreach my $require (@{$required}) {
    my $reg = $registry->{$require};
    unless (defined($reg) && $reg->{status} == 0) {
      $logger->warn("skipped $name due to missing requirement $require") if $logger;
      print $progress_log "Skipped $name due to missing requirement $require\n";
      print "WARNING: skipped $name due to missing requirement $require.\n";
      return; # missing requirement
    }
  }
  # store any unstored messages
  my %msg = ();
  my $msg_file = catfile($opts->{OUT_DIR}, $name . "_msgs.txt");
  if (!(defined($job->{ALL_FILE}) || defined($job->{ALL_VAR}))) {
    if (!(defined($job->{ERR_FILE}) || defined($job->{ERR_VAR})) && 
      !(defined($job->{OUT_FILE}) || defined($job->{OUT_VAR}))) {
      $msg{ALL_FILE} = $msg_file;
    } elsif (!(defined($job->{ERR_FILE}) || defined($job->{ERR_VAR}))) {
      $msg{ERR_FILE} = $msg_file;
    } elsif (!(defined($job->{OUT_FILE}) || defined($job->{OUT_VAR}))) {
      $msg{OUT_FILE} = $msg_file;
    }
  }
  # convert the command to a string
  my $cmd;
  $cmd = &ExecUtils::stringify_args2(%{$job});
  # echo the command
  $logger->debug("About to invoke: $cmd") if $logger;
  print $progress_log "Invoking:\n  $cmd\n";
  print "Starting $job->{PROG}: $cmd\n" if ($opts->{ECHO});
  # run the command
  my $time;
  my $oot; # out of time flag
  $opts->{VALGRIND} = "valgrind" if (defined $opts->{VALGRIND});
  my $status = &ExecUtils::invoke(%{$job}, %msg, TMPDIR => $temp_dir,
    TIME => \$time, OOT => \$oot, VALGRIND => $opts->{VALGRIND}); 
  $logger->debug("Finished invoke of $name with status $status") if $logger;
  print $progress_log "Finished invoke:\n".
      "  name: $name  status: $status  time: $time" . 
      ($oot ? "  (ran out of time!)\n" : "\n");
  if ($opts->{ECHO}) {
    # echo program messages
    if (-s $msg_file && $opts->{ECHO} && !$suppress_messages) {
      my $fh;
      sysopen($fh, $msg_file, O_RDONLY);
      while (<$fh>) { print $_ };
      close($fh);
    }
    # print timeout warnings
    if ($oot) {
      print "Ran out of time! Stopping $job->{PROG}\n"
    }
    # print sucess or failure to stdout
    if ($status == 0) {
      print uc($job->{PROG}) . " ran successfully in $time seconds\n"
    } else {
      if ($status == -1) {
        print "$job->{PROG} failed to run\n";
      } elsif ($status & 127) {
        print "$job->{PROG} process died with signal " . ($status & 127) . 
            ", " . (($status & 128) ? 'with' : 'without') . " coredump\n";
      } else {
        print "$job->{PROG} exited with error code " . ($status >> 8) . "\n";
      }
    }
  }
  # remove messages file if it's empty
  unlink($msg_file) unless (-s $msg_file);
  # find which directory it used
  my $out_dir;
  for (my $i = 0; $i < scalar(@{$job->{ARGS}})-1; $i++) {
    my $arg = $job->{ARGS}->[$i];
    if ($arg eq '-oc' || $arg eq '--oc') {
      $out_dir = $job->{ARGS}->[$i+1] if (-d $job->{ARGS}->[$i+1]);
      last;
    }
  }
  # store the result
  my $entry = {name => $name, cmd => $cmd, dir => $out_dir, 
    messages => $msg_file, status => $status, time => $time, 
    oot => $oot, outputs => $outputs};
  $registry->{$name} = $entry;
  push(@jobs, $entry); 
}

#
# Check if the file is the same
#
sub same_file {
  $logger->trace("call same_file") if $logger;
  my ($fn1, $fn2) = @_;
  my ($dev1,$ino1) = stat($fn1);
  my ($dev2,$ino2) = stat($fn2);
  return ($dev1 == $dev2 && $ino1 == $ino2);
}

# 
# Load the alphabet
#
sub get_alphabet {
  $logger->trace("call get_alphabet") if $logger;
  my ($opts, $registry) = @_;
  my $out_dir = $opts->{OUT_DIR};
  my ($alph_name, $alph_file, $alph_obj);
  if ($opts->{ALPH_FILE}) {
    $alph_file = catfile($out_dir, (fileparse($opts->{ALPH_FILE}))[0]);
    # link the alphabet file to the output directory, if it's not already there
    if (-e $alph_file) {
      if (!&same_file($opts->{ALPH_FILE}, $alph_file)) {
        unlink($alph_file) or die("Can't write the alphabet file");
        unless (link($opts->{ALPH_FILE}, $alph_file)) {
          copy($opts->{ALPH_FILE}, $alph_file) or die("Failed to copy alphabet file: $!");
        }
      } 
    } else {
      unless (link($opts->{ALPH_FILE}, $alph_file)) {
        copy($opts->{ALPH_FILE}, $alph_file) or die("Failed to copy alphabet file: $!");
      }
    }
    $alph_obj = new Alphabet($alph_file);
  } else {
    $alph_name = $opts->{ALPH_NAME};
    die("Can't use option -dna2rna with non-DNA sequences!") if ($opts->{DNA2RNA} && $alph_name ne 'DNA');
    if ($alph_name eq 'DNA') {
      if ($opts->{DNA2RNA}) {
        $alph_name = 'RNA';
        $alph_obj = Alphabet::rna();
      } else {
        $alph_obj = Alphabet::dna();
      }
    } elsif ($alph_name eq 'RNA') {
      $alph_obj = Alphabet::rna();
    } elsif ($alph_name eq 'PROTEIN') {
      $alph_obj = Alphabet::protein();
    }
  }
  die("Name or file must be defined!") unless (defined($alph_name) || defined($alph_file));
  my @args = (defined($alph_file) ? ('-alph', $alph_file) :  ('-' . lc($alph_name)));
  return {name => $alph_name, file => $alph_file, obj => $alph_obj, args => \@args};
} # get_alphabet

sub get_background {
  $logger->trace("call get_background") if $logger;
  my ($opts, $registry, $alphabet, $seqs) = @_;
  my $alph = $alphabet->{obj};
  my $out_dir = $opts->{OUT_DIR};
  my $bfile;
  if ($opts->{BFILE}) {
    $bfile = catfile($out_dir, (fileparse($opts->{BFILE}))[0]);
    # link the background file to the output directory, if it's not already there
    if (-e $bfile) {
      if (!&same_file($opts->{BFILE}, $bfile)) {
        unlink($bfile) or die("Can't write the background file");
        unless (link($opts->{BFILE}, $bfile)) {
          copy($opts->{BFILE}, $bfile) or die("Failed to copy background: $!");
        }
      } 
    } else {
      unless (link($opts->{BFILE}, $bfile)) {
        copy($opts->{BFILE}, $bfile) or die("Failed to copy background: $!");
      }
    }
    $registry->{'bg'} = {status => 0};
  } else {
    $bfile = catfile($out_dir, 'background');
    # generate a background file and calculate sequence metrics
    my $order = $opts->{ORDER};
    my $sequences = defined $opts->{NEG_SEQS} ? $opts->{NEG_SEQS} : $opts->{POS_SEQS};
    &run_job(
      $opts, $registry, 'bg', [],
      {
        PROG => 'fasta-get-markov',
        BIN => $secondary_bin_dir,
        ARGS => ['-nostatus', '-nosummary', @{$alphabet->{args}}, '-m', $order, 
          '-pseudo', 1, $sequences, $bfile]
      }, 1, 
      [{FILE => $bfile, NAME => 'Background'}]
    );
  }
  # get the background
  my %bg = read_background_file($alph, $bfile);
  return {file => $bfile, obj => \%bg};
} # get_background

# 
# Read in the primary and control sequences (if any).
#
sub get_sequences {
  $logger->trace("call get_sequences") if $logger;
  my ($opts, $registry, $alphabet) = @_;
  my ($metrics, $count, $maxlen, $control_count);
  my $out_dir = $opts->{OUT_DIR};
  my $pos_input = $opts->{POS_SEQS};
  my $neg_input = $opts->{NEG_SEQS};
  my $pos_centered = catfile($out_dir, "trimmed_primary_seqs");

  # Count the number of primary sequences.
  &run_job($opts, $registry, 'count_primary_seqs', [],
    {PROG => 'getsize', BIN => $secondary_bin_dir, ARGS => [$pos_input],
     OUT_VAR => \$metrics, OUT_NAME => '$metrics'}
  );
  die("getsize failed me...") unless ($metrics =~ m/^(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s/);
  $count = $1;
  $maxlen = $3;

  # Count the number of control sequences.
  if (defined $neg_input) {
    &run_job($opts, $registry, 'count_control_seqs', [],
      {PROG => 'getsize', BIN => $secondary_bin_dir, ARGS => [$neg_input],
       OUT_VAR => \$metrics, OUT_NAME => '$metrics'}
    );
    die("getsize failed me...") unless ($metrics =~ m/^(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s/);
    $control_count = $1;
  } else {
    $control_count = $count;
  }

  # Extract the central portions of the primary sequences if requested.
  if ($opts->{CTRIM}) {
    &run_job($opts, $registry, 'center_seqs', [],
      {
        PROG => 'fasta-center', BIN => $script_dir,
        ARGS => [@{$alphabet->{args}}, '-len', $opts->{CTRIM}],
        IN_FILE => $pos_input, OUT_FILE => $pos_centered
      },
      0, [{FILE => $pos_centered, NAME => "Trimmed Primary Sequences"}]);
  }

  # Return the sequences.
  return {
    PRIMARY => ($opts->{CTRIM} ? $pos_centered : $pos_input),
    PRIMARY_COUNT => $count,
    PRIMARY_MAXLEN => $maxlen,
    CONTROL => $neg_input,
    CONTROL_COUNT => $control_count
  };
} # get_sequences

#
# Load motifs from MEME or STREME output using the passed parser.
#
sub parse_motifs {
  $logger->trace("call parse_motifs") if $logger;
  my ($pgm, $db, $bg, $parser, $file) = @_;
  my $fh;
  unless(sysopen($fh, $file, O_RDONLY)) {
    warn("Failed to open the output of $pgm to parse motifs.\n");
    return ();
  }
  local $/ = \100;
  while (<$fh>) {
    $parser->parse_more($_);
  }
  close($fh);
  $parser->parse_done();
  my @motifs = $parser->get_motifs();
  my $nmotifs = scalar(@motifs);
  foreach my $motif (@motifs) {
    $motif->{file} = $file;
    $motif->{bg} = $bg;
    $motif->{db} = $db;
    $motif->{pgm} = $pgm;
  }
  return ($nmotifs, @motifs);
} # parse_motifs

#
# Run MEME and load the motifs it finds into memory
#
sub run_meme {
  $logger->trace("call meme") if $logger;
  my ($opts, $registry, $alphabet, $background, $seqs) = @_;
  my $alph = $alphabet->{obj};
  my $timeout = &time_allocate($opts, 'meme');
  my $meme_dir = catdir($opts->{OUT_DIR}, "meme_out");
  my @args = (
    '-oc', $meme_dir, 
    '-mod', $opts->{MEME_MOD}, 
    '-minw', $opts->{MINW}, 
    '-maxw', $opts->{MAXW},
    '-bfile', $background->{file}, 
    '-markov_order', $opts->{ORDER},
    '-seed', $opts->{SEED},
    @{$alphabet->{args}}
  );
  push(@args, '-revcomp') if ($alph->has_complement());
  if (defined($opts->{MEME_EVT})) {
    push(@args, '-evt', $opts->{MEME_EVT});
  } else {
    push(@args, '-nmotifs', $opts->{MEME_NMOTIFS});
  }
  push(@args, '-p', $opts->{MEME_P}) if (defined($opts->{MEME_P}));
  push(@args, '-brief', $opts->{MEME_BRIEF}) if (defined($opts->{MEME_BRIEF}));
  push(@args, '-nostatus') unless $opts->{VERBOSITY} >= 2;
  # try to give MEME 60 seconds to finish before the timeout; otherwises give it 1 second.
  push(@args, '-time', ($timeout > 60 ? $timeout - 60 : 1)) if ($timeout);
  push(@args, '-searchsize', $opts->{MEME_SEARCHSIZE}) if (defined($opts->{MEME_SEARCHSIZE}));
  push(@args, $seqs->{PRIMARY});
  &run_job($opts, $registry, 'meme', ['bg'], 
      {PROG => 'meme', BIN => $meme_bin_dir, ARGS => \@args, TIMEOUT => $timeout},
      0,[{FILE => catfile($meme_dir, 'meme.html'), NAME => 'MEME HTML'}, 
        {FILE => catfile($meme_dir, 'meme.txt'), NAME => 'MEME text'}, 
        {FILE => catfile($meme_dir, 'meme.xml'), NAME => 'MEME XML'}
      ]
  );
  if (&job_ok($registry, 'meme')) {
    my ($nmotifs, @motifs) = 
      &parse_motifs('meme', -1, $background->{obj}, new MotifInMemeXML(), catfile($meme_dir, 'meme.xml'));
    $registry->{meme}->{NMOTIFS} = $nmotifs;
    # Set pgm_log_evalue: convert MEME E-value string to log(E-value).
    for (my $i = 0; $i < scalar(@motifs); $i++) {
      $motifs[$i]->{pgm_log_evalue} = &str_to_log($motifs[$i]->{evalue});
    }
    return @motifs;
  }
  return ();
} # run_meme

#
# Run STREME and load the motifs it finds into memory
#
sub run_streme {
  $logger->trace("call streme") if $logger;
  my ($opts, $registry, $alphabet, $background, $seqs) = @_;
  my $streme_dir = catdir($opts->{OUT_DIR}, "streme_out");
  
  # calculate the time available to run STREME
  my $timeout = &time_allocate($opts, 'streme');

  my @args = (
    '--verbosity', $opts->{VERBOSITY}, 
    '--oc', $streme_dir, 
    @{$alphabet->{args}},
    '--minw', $opts->{MINW}, 
    '--maxw', $opts->{MAXW},
    '--order', $opts->{ORDER}, 
    '--bfile', $background->{file},
    '--seed', $opts->{SEED},
    '--align', $opts->{ALIGN}
  );
  # try to give STREME 60 seconds to finish before the timeout; otherwise give it 1 second.
  push(@args, '--time', ($timeout > 60 ? $timeout - 60 : 1)) if ($timeout);
  push(@args, '--totallength', $opts->{STREME_TOTALLENGTH}) if defined($opts->{STREME_TOTALLENGTH});
  push(@args, '--evalue');
  push(@args, '--thresh', $opts->{STREME_EVT}) if defined($opts->{STREME_EVT});
  push(@args, '--nmotifs', $opts->{STREME_NMOTIFS}) if defined($opts->{STREME_NMOTIFS});
  push(@args, '--p', $seqs->{PRIMARY});
  push(@args, '--n', $seqs->{CONTROL}) if defined($seqs->{CONTROL});

  my @required = ('bg');
  &run_job($opts, $registry, 'streme', \@required, {
      PROG => 'streme', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout},
      0, [{FILE => catfile($streme_dir, 'streme.html'), NAME => 'STREME HTML'},
        {FILE => catfile($streme_dir, 'streme.txt'), NAME => 'STREME text'},
        {FILE => catfile($streme_dir, 'streme.xml'), NAME => 'STREME XML'}
      ]);

  if (&job_ok($registry, 'streme')) {
    my ($nmotifs, @motifs) = 
      &parse_motifs('streme', -2, $background->{obj}, new MotifInStremeXML(), catfile($streme_dir, "streme.xml"));
    $registry->{streme}->{NMOTIFS} = $nmotifs;
    # Set pgm_log_evalue: convert STREME log10_evalue to log_evalue.
    for (my $i = 0; $i < scalar(@motifs); $i++) {
      $motifs[$i]->{pgm_log_evalue} = $motifs[$i]->{log_evalue}*log(10);
    }
    return @motifs;
  }
  return ();
} # run_streme

# 
# Run SEA to find enriched motifs in discovered and known motifs.
#
sub run_sea {
  $logger->trace("call sea") if $logger;
  my ($opts, $registry, $alphabet, $background, $seqs, $motifs) = @_;
  my $alph = $alphabet->{obj};
  my $log_evt = log($opts->{EVT});

  # Create a map for looking up DBs and set up motif input files.
  my @db_map = ();
  my @db_counts = ();
  my @disc_motif_inputs = ();
  my @known_motif_inputs = ();
  my $n_disc_dbs = 0;
  my $n_known_dbs = 0;
  if (&nmotifs($registry, 'meme') > 0) {
    push(@disc_motif_inputs, '--m', catfile($registry->{meme}->{dir}, 'meme.xml'));
    push(@db_map, -1);
    $n_disc_dbs++;
  }
  if (&nmotifs($registry, 'streme') > 0) {
    push(@disc_motif_inputs, '--m', catfile($registry->{streme}->{dir}, 'streme.xml'));
    push(@db_map, -2);
    $n_disc_dbs++;
  }
  foreach (0 .. (scalar(@{$opts->{DBS}}) - 1)) {
    push(@known_motif_inputs, '--m', $opts->{DBS}[$_]);
    push(@db_map, $_);
    push(@db_counts, 0);
    $n_known_dbs++;
  }

  # Done if there are no motifs.
  return @db_counts unless (@disc_motif_inputs || @known_motif_inputs);

  # Create a lookup table so we can look up our discovered motifs in the SEA results.
  my %m_map = ();
  for (my $i = 0; $i < scalar(@{$motifs}); $i++) {
    my $motif = $motifs->[$i];
    $m_map{$motif->{db}} = {} unless defined($m_map{$motif->{db}});
    # MEME IDs are not unique, so use ALT_ID.
    if ($motif->{pgm} eq "meme") {
      $m_map{$motif->{db}}->{$motif->{alt}} = $motif;
    } else {
      $m_map{$motif->{db}}->{$motif->{id}} = $motif;
    }
    # Set this in case the motif is skipped by SEA.
    $motif->{sea_log_pvalue} = 1;
  }

  # Run SEA on just the discovered motifs to get information on all of them.
  if ($n_disc_dbs > 0) {
    my ($name, $title);
    if ($n_known_dbs > 0) {
      $name = "sea_disc_out";
      $title = "SEA DISCOVERED";
    } else {
      $name = "sea_out";
      $title = "SEA";
    }
    my $sea_dir = catdir($opts->{OUT_DIR}, $name);
    my $timeout = &time_allocate($opts, 'sea');
    my @args = (
      '--verbosity', $opts->{VERBOSITY},
      '--oc', $sea_dir,
      '--qvalue',
      '--thresh', 1,			# report all motifs
      '--order', $opts->{ORDER},
      '--bfile', $background->{file},
      '--seed', $opts->{SEED},
      '--align', $opts->{ALIGN},
      '--motif-pseudo', $DEFAULT_MOTIF_PSEUDO,
      '--noseqs',
      @disc_motif_inputs
    );
    push(@args, '--xalph', $alphabet->{file}) if ($opts->{XALPH});
    push(@args, '--p', $seqs->{PRIMARY});
    push(@args, '--n', $seqs->{CONTROL}) if ($seqs->{CONTROL});
    &run_job($opts, $registry, 'sea_disc', ['bg'],
      {PROG => 'sea', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout},
      0,
      [ {FILE => catfile($sea_dir, 'sea.html'), NAME => $title . ' HTML'},
	{FILE => catfile($sea_dir, 'sea.tsv'), NAME => $title . ' TSV'},
	{FILE => catfile($sea_dir, 'sequences.tsv'), NAME => $title . ' SEQUENCES'}
      ]
    );
    # Done if SEA failed.
    return @db_counts unless &job_ok($registry, 'sea_disc');

    # Parse the SEA output and update or create the motif objects. 
    my $parser = new JsonRdr();
    my $info = $parser->load_file(catfile($sea_dir, 'sea.html'));
    my $info_motifs = $info->{data}->{motifs};
    for (my $i=0; $i<scalar(@{$info_motifs}); $i++) {
      my $info_motif = $info_motifs->[$i];
      my $motif = $m_map{$db_map[$info_motif->{db}]}->{$info_motif->{id}};
      # MEME motifs will be undefined under ID, so look up under ALT_ID.
      if (! $motif) { $motif = $m_map{$db_map[$info_motif->{db}]}->{$info_motif->{alt}}; }
      $motif->{sea_log_pvalue} = $info_motif->{log_pvalue};
      # Set E-value and q-value if there are no known motifs.
      if ($n_known_dbs == 0) {
	$motif->{sea_log_evalue} = $info_motif->{log_evalue};
	$motif->{sea_log_qvalue} = $info_motif->{log_qvalue};
      }
      $motif->{sea_max_sites} = $info_motif->{max_sites};
      $motif->{sea_site_hist} = $info_motif->{site_hist};
    }
  } # discovered motifs

  # Run SEA on the discovered and known motifs limiting to E-value threshold.
  if ($n_known_dbs > 0) {
    my $sea_dir = catdir($opts->{OUT_DIR}, 'sea_out');
    my $timeout = &time_allocate($opts, 'sea');
    my @args = (
      '--verbosity', $opts->{VERBOSITY}, 
      '--oc', $sea_dir, 
      '--thresh', $opts->{EVT},
      '--order', $opts->{ORDER}, 
      '--bfile', $background->{file},
      '--seed', $opts->{SEED},
      '--align', $opts->{ALIGN},
      '--motif-pseudo', $DEFAULT_MOTIF_PSEUDO,
      @disc_motif_inputs,
      @known_motif_inputs
    );
    push(@args, '--noseqs') if ($opts->{SEA_NOSEQS});
    push(@args, '--xalph', $alphabet->{file}) if ($opts->{XALPH});
    push(@args, '--p', $seqs->{PRIMARY});
    push(@args, '--n', $seqs->{CONTROL}) if ($seqs->{CONTROL});
    &run_job($opts, $registry, 'sea', ['bg'], 
      {PROG => 'sea', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout}, 
      0, 
      [ {FILE => catfile($sea_dir, 'sea.html'), NAME => 'SEA HTML'},
	{FILE => catfile($sea_dir, 'sea.tsv'), NAME => 'SEA TSV'},
	{FILE => catfile($sea_dir, 'sequences.tsv'), NAME => 'SEA SEQUENCES'}
      ]
    );

    # Done if SEA failed.
    return @db_counts unless &job_ok($registry, 'sea');

    # Parse the SEA output and update or create the motif objects. 
    my $parser = new JsonRdr();
    my $info = $parser->load_file(catfile($sea_dir, 'sea.html'));
    my $info_motifs = $info->{data}->{motifs};
    # Get the known motif database sizes from the SEA output.
    if ($n_known_dbs > 0) {
      my $info_dbs = $info->{data}->{motif_dbs};
      for (my $i=$n_disc_dbs; $i<scalar(@{$info_dbs}); $i++) {
	my $index = $i;		# offset db_map index
	$db_counts[$db_map[$index]] = $info_dbs->[$i]->{count};
      }
    }
    for (my $i=0; $i<scalar(@{$info_motifs}); $i++) {
      my $info_motif = $info_motifs->[$i];
      my $motif = $m_map{$db_map[$info_motif->{db}]}->{$info_motif->{id}};
      # MEME motifs will be undefined under ID, so look up under ALT_ID.
      if (! $motif && defined $info_motif->{alt}) { $motif = $m_map{$db_map[$info_motif->{db}]}->{$info_motif->{alt}}; }
      if ($motif) {
	# Discovered motif.
	$motif->{sea_log_pvalue} = $info_motif->{log_pvalue};
	$motif->{sea_log_evalue} = $info_motif->{log_evalue};
	$motif->{sea_log_qvalue} = $info_motif->{log_qvalue};
	$motif->{sea_max_sites} = $info_motif->{max_sites};
	$motif->{sea_site_hist} = $info_motif->{site_hist};
      } else {
	# Known motif.
	if ($info_motif->{log_evalue} <= $log_evt) {
	  # Known motif passes E-value threshold. 
	  # Create motif object from SEA output.
	  my $motif = {
	    pgm => 'sea',
	    bg => $background->{obj}, 
	    strands => ($alph->has_complement() ? 2 : 1), 
	    db => $info_motif->{db} - $n_disc_dbs,
	    file => $opts->{DBS}->[$info_motif->{db} - $n_disc_dbs],
	    id => $info_motif->{id}, 
	    alt => $info_motif->{alt}, 
	    consensus => $info_motif->{consensus},
	    url => $info_motif->{url}, 
	    width => $info_motif->{len}, 
	    sites => $info_motif->{tp},			# SEA tp
	    pspm => {},
	    sea_log_pvalue => $info_motif->{log_pvalue},	# SEA log-pvalue
	    sea_log_evalue => $info_motif->{log_evalue},	# SEA log-pvalue
	    sea_log_qvalue => $info_motif->{log_qvalue},	# SEA log-qvalue
	    pgm_log_evalue => $info_motif->{log_evalue},	# SEA log-evalue
	    evalue => log_to_str($info_motif->{log_evalue}, 2), 	# SEA evalue for intern_to_meme
	    sea_max_sites => $info_motif->{max_sites},
	    sea_site_hist => $info_motif->{site_hist}
	  };
	  for (my $a = 0; $a < $alph->size_core(); $a++) {
	    my @column = ();
	    for (my $i = 0; $i < $motif->{width}; $i++) {
	      $column[$i] = $info_motif->{pwm}->[$i]->[$a];
	    }
	    $motif->{pspm}->{$alph->char($a)} = \@column;
	  }
	  push(@{$motifs}, $motif);
	}
      }
    }
  } # all motifs

  # Sort the motif objects by SEA log_pvalue.
  #   1) SEA log(p-value)
  #   2) program log(E-value)
  #   3) ID
  @{$motifs} = sort {
    $a->{sea_log_pvalue} <=> $b->{sea_log_pvalue} ||
    $a->{pgm_log_evalue} <=> $b->{pgm_log_evalue} ||
    $a->{id} cmp $b->{id}
  } @{$motifs};

  # Set the SEA ranks in the motif objects.
  my $rank = 0;
  for (my $i=0; $i<scalar(@{$motifs}); $i++) {
    my $motif = $motifs->[$i];
    # Set the SEA Rank of the motif; tied motifs have the same rank.
    $rank++ if ($i == 0 || $motif->{sea_log_pvalue} > $motifs->[$i-1]->{sea_log_pvalue}); 
    $motif->{rank} = $rank;	# SEA rank
  }
  return @db_counts;
} # run_sea

#
# Created file of all significant motifs, sorted by SEA log(p-value).
#
sub create_combined_file {
  my ($opts, $registry, $motifs) = @_;

  # Sort the motifs by:
  #   1) SEA log(p-value)
  #   2) ab initio motifs first
  #   3) PGM log(E-value)
  #   4) ID
  @{$motifs} = sort {
    $a->{sea_log_pvalue} <=> $b->{sea_log_pvalue} || 
    ($a->{pgm} ne "sea" && $b->{pgm} eq "sea" ? -1 : 0) ||
    ($a->{pgm} eq "sea" && $b->{pgm} ne "sea" ? 1 : 0) ||
    $a->{pgm_log_evalue} <=> $b->{pgm_log_evalue} || 
    $a->{id} cmp $b->{id}
  } @{$motifs};

  # Write out the motifs to a combined file, use numbers to identify them.
  my $combined_motifs_file = catfile($opts->{OUT_DIR}, 'combined.meme');
  my $fh;
  sysopen($fh, $combined_motifs_file, O_WRONLY | O_CREAT | O_TRUNC);
  for (my $i = 0; $i < scalar(@{$motifs}); $i++) {
    my %motif = %{$motifs->[$i]};
    $motif{alt} = (defined $motif{id} && defined $motif{alt}) ? $motif{id} . '-' . $motif{alt} : (defined $motif{id}) ? $motif{id} : '';
    $motif{id} = $i+1;		# Start indices at 1
    $motif{url} = '';
    print $fh intern_to_meme(\%motif, 0, 1, $i == 0);
  }
  close($fh);
  
} # create_combined_file

# 
# Run SEA on the full-length primary sequences to get site distributions.
#
sub run_distr {
  $logger->trace("call sea") if $logger;
  my ($opts, $registry, $alphabet, $background, $seqs, $motifs) = @_;
  my $alph = $alphabet->{obj};
  my $distr_dir = catdir($opts->{OUT_DIR}, 'distr_out');
  my $combined_motifs_file = catfile($opts->{OUT_DIR}, 'combined.meme');
  my $nmotifs = scalar(@{$motifs});
  my @m_index = ();

  # Calculate the time available to run SEA for distribution.
  my $timeout = &time_allocate($opts, 'distr_sea');

  # Create a lookup table so we can look up our motifs using the SEA results.
  for (my $i=0; $i<$nmotifs; $i++) {
    my $motif = $motifs->[$i];
    $m_index[$i+1] = $motif;            # combined.meme IDs start at "1"
  }

  # Run SEA on the (full-length) primary sequences using just the significant
  # motifs to get the site distributions.
  my @args = (
    '--verbosity', $opts->{VERBOSITY}, 
    '--oc', $distr_dir, 
    '--pvalue',
    '--thresh', 1,
    '--hofract', 0, 
    '--order', $opts->{ORDER}, '--bfile', $background->{file},
    '--seed', $opts->{SEED},
    '--motif-pseudo', $DEFAULT_MOTIF_PSEUDO
  );
  push(@args, '--xalph', $alphabet->{file}) if ($opts->{XALPH});
  push(@args, '--p', $opts->{POS_SEQS});		# full-length sequences
  push(@args,  '--m', $combined_motifs_file);
  &run_job($opts, $registry, 'distr_sea', ['bg'], 
    {PROG => 'sea', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout}, 
    0, 
    [ ]
    );
 
  return unless &job_ok($registry, 'distr_sea');

  # Parse the motifs and add the site distributions to the motif objects.
  my $parser = new JsonRdr();
  my $info = $parser->load_file(catfile($distr_dir, 'sea.html'));
  my $info_motifs = $info->{data}->{motifs};
  $nmotifs = scalar(@{$info_motifs});
  for (my $i=0; $i<$nmotifs; $i++) {
    my $info_motif = $info_motifs->[$i];
    my $motif = $m_index[$info_motif->{id}];
    $motif->{sea_total_sites} = $info_motif->{total_sites};
    $motif->{sea_site_distr} = $info_motif->{site_distr};
  }

} # run_distr

# 
# Run Tomtom to find similar motifs
#
sub run_tomtom {
  $logger->trace("call tomtom") if $logger;
  my ($opts, $registry, $background, $motifs, $pgm, $db) = @_;
  return unless @{$opts->{DBS}};
  return unless &nmotifs($registry, $pgm) > 0;

  my $job_name = $pgm.'_tomtom';
  my $timeout = &time_allocate($opts, $job_name);
  my $input_motifs = catfile($registry->{$pgm}->{dir}, $pgm.'.xml');
  my $tomtom_dir = catdir($opts->{OUT_DIR}, $pgm.'_tomtom_out');
  my @args = ('-verbosity', $opts->{VERBOSITY}, '-oc', $tomtom_dir, 
    '-min-overlap', 5, '-dist', 'ed', '-evalue', '-thresh', 1.0, '-no-ssc');
  push(@args, '-xalph') if ($opts->{XALPH});
  push(@args, $input_motifs, @{$opts->{DBS}});
  &run_job($opts, $registry, $job_name, ['bg', $pgm], {
      PROG => 'tomtom', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout},
      0, 
      [ {FILE => catfile($tomtom_dir, 'tomtom.html'), NAME => 'Tomtom HTML'},
        {FILE => catfile($tomtom_dir, 'tomtom.tsv'), NAME => 'Tomtom TSV'},
        {FILE => catfile($tomtom_dir, 'tomtom.xml'), NAME => 'Tomtom XML'}
      ]);

  return unless &job_ok($registry, $job_name);

  my $data = XMLin(
    catfile($tomtom_dir, 'tomtom.xml'),
    KeyAttr => [], ForceArray => 1
  );

  # Make an index from query ID to list of matching IDXs.
  my $queries = $data->{queries}->[0]->{motif};
  my $targets = $data->{targets}->[0]->{motif};
  my %query_id_to_target_idxs = ();
  my %query_id_to_idx = ();
  my $matches = $data->{matches}->[0]->{query};
  if (defined $matches) {
    for (my $i = 0; $i < scalar(@{$matches}); $i++) {
      my $q_idx = $matches->[$i]->{idx};
      my $q_id = $queries->[$q_idx]->{id};
      $query_id_to_idx{$q_id} = $q_idx;
      my $q_targets = $matches->[$i]->{target};
      my @target_list = ();
      for (my $j = 0; $j < scalar(@{$q_targets}); $j++) {
        my $t_idx = $q_targets->[$j]->{idx};
        my $t_id = $targets->[$t_idx]->{id};
        push(@target_list, $t_idx);
      }
      $query_id_to_target_idxs{$q_id} = \@target_list;
    }
  }
   
  # Iterate over the motif objects and add the non-self matches.
  for (my $i = 0; $i < scalar(@{$motifs}); $i++) {
    my $motif = $motifs->[$i];
    next if ($motif->{db} ne $db);
    # create the list of matches
    $motif->{matches} = [];
    my $q_id = $motif->{id};
    # set tomtom index of query
    $motif->{idx} = $query_id_to_idx{$q_id};
    # get list of target indices for matching motifs
    my $target_list = $query_id_to_target_idxs{$q_id};
    next unless defined $target_list; 
    # convert each match idx to a match hash and add it to the motif's matches
    for (my $j = 0; $j < scalar(@{$target_list}); $j++) {
      my $t_idx = $target_list->[$j];
      my $target = $targets->[$t_idx];
      my $match = {db => $target->{db}, idx => $t_idx, id => $target->{id}, alt => $target->{alt}, url => $target->{url}};
      push(@{$motif->{matches}}, $match);
    }
  }
} # run_tomtom

#
# run Tomtom to align all the motifs that we've found
#
sub run_align {
  $logger->trace("call align") if $logger;
  my ($opts, $registry, $motifs) = @_;

  return [] unless (@{$motifs});

  my $program = "XSTREME - Motif Discovery and Enrichment Analysis\n";
  my $info = "For further information on how to interpret these results please access @SITE_URL@.\n" .
    "To get a copy of the MEME Suite software please access @SOURCE_URL@.\n\n";
  my $reference = Citation::cite("XSTREME");

  # initialize the arrays for each motif
  my @aligns = ();
  my @used = ();
  for (my $i = 0; $i < scalar(@{$motifs}); $i++) {
    $aligns[$i] = {};
    $used[$i] = 0;
  }
  
  # run Tomtom in text mode
  my $timeout = &time_allocate($opts, 'align_tomtom');
  my $align_file = catfile($opts->{OUT_DIR}, 'motif_alignment.txt');
  my $combined_motifs_file = catfile($opts->{OUT_DIR}, 'combined.meme');
  my @args = ('-verbosity', $opts->{VERBOSITY}, '-text', 
    '-thresh', $opts->{GROUP_WEAK_THRESH}, $combined_motifs_file, $combined_motifs_file);
  &run_job($opts, $registry, 'align_tomtom', [], 
      {PROG => 'tomtom', BIN => $bin_dir, ARGS => \@args, OUT_FILE => $align_file, TIMEOUT => $timeout}, 
      1, 
      [
        {FILE => $align_file, NAME => 'Motif Alignment'}
      ]);
  return [] unless &job_ok($registry, 'align_tomtom');

  # read in the alignment
  my $fh;
  sysopen($fh, $align_file, O_RDONLY);
  my $line = <$fh>;				# skip header line
  while ($line = <$fh>) {
    # remove comments
    $line =~ s/#.*$//;
    # skip empty lines
    next unless $line =~ m/\S/;
    # split into columns
    my @cols = split(/\t/, $line);
    # skip if wrong column count
    next unless scalar(@cols) == 10;
    # get the values we're interested in
    my $query = int($cols[0])-1;		# readjust motif indices to start at 0
    my $target = int($cols[1])-1;		# readjust motif indices to start at 0
    my $offset = -int($cols[2]);
    my $rc = ($cols[9] =~ '-' ? 1 : 0);
    my $qval = $cols[5]; $qval =~ s/^\s+//; $qval =~ s/\s+$//; $qval += 0;
    $aligns[$query]->{$target} = {offset => $offset, rc => $rc, qval => $qval};
  }
  close($fh);

  # arrange the motifs so discovered motifs are first
  my @seeds;
  my @buffer = ();
  for (my $i = 0; $i < scalar(@{$motifs}); $i++) {
    if ($motifs->[$i]->{db} >= 0) {
      push(@buffer, $i);
    } else {
      push(@seeds, $i);
    }
  }
  push(@seeds, @buffer);
  # use a greedy algorithm to group the motifs
  # under their best examples
  my @groups = ();
  my %motif_group = ();
  foreach my $i (@seeds) {
    next if $used[$i];
    # mark self as used
    $used[$i] = 1;
    $motif_group{$i} = scalar(@groups);
    # add self to the group
    my @group = ({motif => $i, offset => 0, rc => 0, is_seed => 1});
    # add all aligned motifs to the group
    while (my ($m, $align) = each %{$aligns[$i]}) {
      next if $used[$m];
      # skip weak links on first pass
      next if ($align->{qval} > $opts->{GROUP_THRESH});
      $used[$m] = 1;
      $motif_group{$m} = scalar(@groups);
      push(@group, {motif => $m, offset => $align->{offset}, 
          rc => $align->{rc}});
    }
    push(@groups, \@group);
  }

  # Now look at each of the groups from best to worst
  # and look at the weak connections.
  for (my $g = 0; $g < scalar(@groups)-1; $g++) {
    my $dest = $groups[$g];
    next unless $dest;
    my $i = $dest->[0]->{motif};
    my %mergable = ();
    while (my ($m, $align) = each %{$aligns[$i]}) {
      my $candidate = $motif_group{$m};
      # Skip links to one or more significant groups.
      next if ($candidate <= $g);
      # Skip if we've already decided the group is mergeable.
      next if ($mergable{$candidate});
      # Skip when motifs in the group that would reject merging.
      next if (scalar(grep {!defined($aligns[$i]->{$_->{motif}})} @{$groups[$candidate]}));
      # Record that the group is mergable.
      $mergable{$candidate} = $groups[$candidate];
      $groups[$candidate] = undef;
    }
    # Merge all the mergable groups.
    foreach my $src (values %mergable) {
      foreach my $m (map {$_->{motif}} @{$src}) {
        my $align = $aligns[$i]->{$m};
        $motif_group{$m} = $g;
        push(@{$dest}, {motif => $m, offset => $align->{offset}, 
            rc => $align->{rc}});
      }
    }
  }
  # Remove merged groups.
  @groups = grep {defined($_)} @groups;
  %motif_group = ();

  # Finally sort and re-align.
  for (my $g = 0; $g < scalar(@groups); $g++) {
    my $group = $groups[$g];

    # Sort the group, ensure the seed is first but others are in order of SEA log(p-value).
    @{$group} = sort {$a->{is_seed} ? -1 : ($b->{is_seed} ? 1 : ($a->{motif} <=> $b->{motif}))} @{$group};

    # Calculate leftmost extreme.
    my $left_extreme = 0;
    foreach my $align (@{$group}) {
      $left_extreme = $align->{offset} if ($align->{offset} < $left_extreme);
    }
    # Realign and calculate total width.
    my $total_width = 0;
    foreach my $align (@{$group}) {
      $align->{offset} -= $left_extreme;
      my $len = $align->{offset} + $motifs->[$align->{motif}]->{width};
      $total_width = $len if ($len > $total_width);
    }
  }

  # Sort the groups so they are in order of seed motif SEA log(p-value).
  @groups = sort {$a->[0]->{motif} <=> $b->[0]->{motif}} @groups;

  # Write out the seed motifs in MEME format to a txt file and
  # mark them as seed motifs.
  my $txt_file = catfile($opts->{OUT_DIR}, 'xstreme.txt');
  sysopen($fh, $txt_file, O_WRONLY | O_CREAT | O_TRUNC);
  for (my $g = 0; $g < scalar(@groups); $g++) {
    my $group = $groups[$g];
    my $motif = $motifs->[$group->[0]->{motif}];
    $motif->{seed_motif} = $g+1;
    print $fh intern_to_meme($motif, 0, 1, $g == 0, $program, $info, $reference);
    # Put the group number of each motif in its object.
    for (my $i = 0; $i < scalar(@{$group}); $i++) {
      my $motif = $motifs->[$group->[$i]->{motif}];
      $motif->{group} = $g+1; 
    }
  }
  close($fh);
  
  return \@groups;
} # run_align

sub print_xstreme_header {
  my ($fg) = @_;

} # print_xstreme_header


sub run_fimo {
  $logger->trace("call fimo") if $logger;
  my ($opts, $registry, $background, $seqs, $motifs, $align) = @_;
  my $t_fimo_start = [&gettimeofday()];
  my $full_timeout = &time_allocate($opts, "fimo");

  my @motif_files = ();
  foreach my $pgm ('meme', 'streme') {
    next unless &job_ok($registry, $pgm);
    next unless $registry->{$pgm}->{NMOTIFS} > 0;
    push(@motif_files, catfile($registry->{$pgm}->{dir}, $pgm.'.xml'));
  }
  return unless @motif_files;

  my $nmotifs = 0;
  for (my $i = 0; $i < @{$align}; $i++) {
    my $key_motif = $motifs->[$align->[$i]->[0]->{motif}];
    # Skip primary motifs from databases.
    next unless (grep {$_ eq $key_motif->{file}} @motif_files);
    my $timeout = undef;
    if (defined($full_timeout)) {
      $timeout = $full_timeout - int(&tv_interval($t_fimo_start, [&gettimeofday()]) + 0.5);
      last if $timeout <= 0;
    }

    # Run FIMO on the (full-length) primary sequences.
    my $job_id = 'fimo'.($nmotifs+1);
    my $fimo_dir = catdir($opts->{OUT_DIR}, 'fimo_out_'.($nmotifs+1));
    my @args = ('--parse-genomic-coord', '--verbosity', $opts->{VERBOSITY}, '--oc', $fimo_dir,
      '--bgfile', $background->{file}, '--motif', $key_motif->{id}, $key_motif->{file},
      $opts->{POS_SEQS}
    );

    &run_job($opts, $registry, $job_id, [], {
        PROG => 'fimo', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout}, 1, [
          {FILE => catfile($fimo_dir, 'fimo.gff'), NAME => 'FIMO GFF'},
          {FILE => catfile($fimo_dir, 'fimo.html'), NAME => 'FIMO HTML'},
          {FILE => catfile($fimo_dir, 'fimo.tsv'), NAME => 'FIMO TSV'}
        ]);
    
    next unless &job_ok($registry, $job_id);

    $key_motif->{fimo_gff} = catfile($fimo_dir, 'fimo.gff');
    $nmotifs++;
  }
} # fimo

sub summary {
  my ($opts, $registry, $commandline, $version, $release) = @_;
  my $infile = $opts->{OUT_DIR} . '/xstreme.html';
  my $outfile = $opts->{OUT_DIR} . '/xstreme.tsv';
  &run_job(
    $opts, $registry, 'summary', [],
    {
      PROG => 'xstreme_html_to_tsv',
      BIN => $script_dir,
      ARGS => [$infile, $outfile, $commandline, $version, $release]
    }, 1, 
    [{FILE => $outfile, NAME => 'Summary'}]
  );

  # Remove the distr_out directory to save space.
  my $distr_dir = catdir($opts->{OUT_DIR}, 'distr_out');
  my $tmpfile = $distr_dir . '/sea.html';
  unlink($tmpfile);
  $tmpfile = $distr_dir . '/sea.tsv';
  unlink($tmpfile);
  $tmpfile = $distr_dir . '/sequences.tsv';
  unlink($tmpfile);
  rmdir($distr_dir);
  
} # summary

sub output {
  $logger->trace("call output") if $logger;
  print $progress_log "Writing output\n";
  my ($opts, $registry, $alphabet, $background, $seqs, $db_counts, $motifs, $align) = @_;
  my $alph = $alphabet->{obj};
  my $bg = $background->{obj};
  my $html_file = catfile($opts->{OUT_DIR}, 'xstreme.html');
  my $htmlwr = new HtmlMonolithWr($etc_dir, 'xstreme_template.html', 
    $html_file, 'xstreme_data.js' => 'data');
  $htmlwr->set_logger($logger) if $logger;
  my $jsonwr = $htmlwr->output();
  my $out_dir = abs_path($opts->{OUT_DIR});
  $jsonwr->str_prop('version', $version);
  $jsonwr->str_prop('revision', $revision);
  $jsonwr->str_prop('release', $release);
  $jsonwr->str_prop('description', $opts->{DESCRIPTION}) if $opts->{DESCRIPTION};
  $jsonwr->str_array_prop('cmd', @cmd);
  $jsonwr->str_prop('evt', $opts->{EVT});
  $jsonwr->str_prop('align', $opts->{ALIGN});
  $jsonwr->bool_prop('fimo_skip', $opts->{FIMO_SKIP});
  $jsonwr->bool_prop('mea_only', $opts->{MEA_ONLY});
  $jsonwr->property('programs');
  $jsonwr->start_array_value();
  for (my $i = 0; $i < scalar(@jobs); $i++){
    my $job = $jobs[$i];
    $jsonwr->start_object_value();
    $jsonwr->str_prop('name', $job->{name});
    $jsonwr->str_prop('cmd', $job->{cmd});
    $jsonwr->num_prop('status', $job->{status});
    $jsonwr->bool_prop('oot', $job->{oot});
    $jsonwr->num_prop('time', $job->{time});
    if (-e $job->{messages}) {
      $jsonwr->str_prop('messages_file', abs2rel(abs_path($job->{messages}), $out_dir))
    }
    $jsonwr->property('outputs');
    $jsonwr->start_array_value();
    if ($job->{outputs}) {
      for (my $j = 0; $j < scalar(@{$job->{outputs}}); $j++) {
        my $output = $job->{outputs}->[$j];
        if (-s $output->{FILE}) {
          $jsonwr->start_object_value();
          if ($output->{NAME}) {
            $jsonwr->str_prop('name', $output->{NAME});
          } else {
            my ($name) = fileparse($output->{FILE});
            $jsonwr->str_prop('name', $name);
          }
          $jsonwr->str_prop('file', abs2rel(abs_path($output->{FILE}), $out_dir));
          $jsonwr->end_object_value();
        }
      }
    }
    $jsonwr->end_array_value();
    $jsonwr->end_object_value();
  }
  $jsonwr->end_array_value();
  $jsonwr->property('alphabet');
  $alph->to_json($jsonwr);
  $jsonwr->property('background');
  $jsonwr->start_object_value();
  $jsonwr->str_prop('source', (defined($opts->{BFILE}) ? $opts->{BFILE} : '--control--')); 
  $jsonwr->num_prop('order', $opts->{ORDER});
  $jsonwr->property('freqs');
  $jsonwr->start_array_value();
  for (my $i = 0; $i < $alph->size_core(); $i++) {
    $jsonwr->num_value($bg->{$alph->char($i)});
  }
  $jsonwr->end_array_value();
  $jsonwr->end_object_value();

  $jsonwr->property('sequence_db');
  $jsonwr->start_object_value();
  $jsonwr->str_prop('source', $opts->{POS_SEQS});
  $jsonwr->num_prop('count', $seqs->{PRIMARY_COUNT});
  $jsonwr->num_prop('maxlen', $seqs->{PRIMARY_MAXLEN});
  $jsonwr->str_prop('file', abs2rel(abs_path($seqs->{PRIMARY}), $out_dir));
  $jsonwr->end_object_value();

  $jsonwr->property('neg_sequence_db');
  $jsonwr->start_object_value();
  $jsonwr->str_prop('source', $opts->{NEG_SEQS} ? 
    $opts->{NEG_SEQS} : 
    $opts->{ORDER} . '-order shuffled primary sequences'
  );
  $jsonwr->num_prop('count', $seqs->{CONTROL_COUNT});
  $jsonwr->str_prop('file', $opts->{NEG_SEQS} ? 
    abs2rel(abs_path($seqs->{CONTROL}), $out_dir) : 
    ""
  );
  $jsonwr->end_object_value();

  $jsonwr->property("motif_dbs");
  $jsonwr->start_array_value();
  for (my $i = 0; $i < scalar(@{$opts->{DBS}}); $i++) {
    $jsonwr->start_object_value();
    $jsonwr->str_prop('source', $opts->{DBS}->[$i]);
    $jsonwr->num_prop('count', $db_counts->[$i]);
    $jsonwr->end_object_value();
  }
  $jsonwr->end_array_value();
  $jsonwr->property('motif_count');
  $jsonwr->start_object_value();
  $jsonwr->num_prop('meme', &nmotifs($registry, 'meme'));
  $jsonwr->num_prop('streme', &nmotifs($registry, 'streme'));
  $jsonwr->end_object_value();
  $jsonwr->property('motifs');
  $jsonwr->start_array_value();
  for (my $i = 0; $i < scalar(@{$motifs}); $i++) {
    my $motif = $motifs->[$i];
    $jsonwr->start_object_value();
    $jsonwr->str_prop('pgm', $motif->{pgm});
    $jsonwr->num_prop('db', $motif->{db});
    $jsonwr->str_prop('id', $motif->{id});
    $jsonwr->str_prop('idx', $motif->{idx}) if (defined $motif->{idx});
    $jsonwr->str_prop('alt', $motif->{alt}) if ($motif->{alt});
    if ($motif->{consensus}) {
      $jsonwr->str_prop('consensus', $motif->{consensus}) 
    } elsif ($motif->{db} < 0) {
      my $consensus = $motif->{id};
      $consensus =~ s/^\d-//;		# remove motif number from STREME ids	
      $jsonwr->str_prop('consensus', $consensus) 
    }
    $jsonwr->num_prop('len', $motif->{width}) if (defined $motif->{width});
    $jsonwr->num_prop('sites', $motif->{sites}) if (defined $motif->{sites});
    $jsonwr->num_prop('rank', $motif->{rank}) if (defined $motif->{rank});
    $jsonwr->str_prop('sea_pvalue', &log_to_str($motif->{sea_log_pvalue}, 2));
    $jsonwr->str_prop('sea_evalue', (defined $motif->{sea_log_evalue} ? &log_to_str($motif->{sea_log_evalue}, 2) : ""));
    $jsonwr->str_prop('sea_qvalue', (defined $motif->{sea_log_qvalue} ? &log_to_str($motif->{sea_log_qvalue}, 2) : ""));
    $jsonwr->str_prop('pgm_evalue', &log_to_str($motif->{pgm_log_evalue}, 2));
    $jsonwr->bool_prop('pgm_evalue_accurate', ($motif->{pgm} ne 'streme' || $motif->{log_evalue_accurate}));
    $jsonwr->str_prop('url', $motif->{url}) if ($motif->{url});
    $jsonwr->num_prop('cluster', $motif->{group}) if (defined $motif->{group});
    $jsonwr->num_prop('seed_motif', $motif->{seed_motif}) if (defined $motif->{seed_motif});
    $jsonwr->property('pwm');
    $jsonwr->start_array_value();
    for (my $j = 0; $j < $motif->{width}; $j++) {
      $jsonwr->start_array_value();
      for (my $k = 0; $k < $alph->size_core(); $k++) {
        $jsonwr->num_value($motif->{pspm}->{$alph->char($k)}->[$j]);
      }
      $jsonwr->end_array_value();
    }
    $jsonwr->end_array_value();
    $jsonwr->property('tomtom_matches');
    $jsonwr->start_array_value();
    if ($motif->{matches}) {
      for (my $j = 0; $j < scalar(@{$motif->{matches}}); $j++) {
        my $match = $motif->{matches}->[$j];
        $jsonwr->start_object_value();
        $jsonwr->num_prop('db', $match->{db});
        $jsonwr->str_prop('idx', $match->{idx});
        $jsonwr->str_prop('id', $match->{id});
        $jsonwr->str_prop('alt', $match->{alt}) if ($match->{alt});
        $jsonwr->str_prop('url', $match->{url}) if ($match->{url});
        $jsonwr->end_object_value();
      }
    }
    $jsonwr->end_array_value();
    if ($motif->{sea_total_sites}) {
      $jsonwr->num_prop('sea_total_sites', $motif->{sea_total_sites});
      $jsonwr->num_array_prop('sea_site_distr', @{$motif->{sea_site_distr}});
    }
    if ($motif->{sea_max_sites}) {
      $jsonwr->num_prop('sea_max_sites', $motif->{sea_max_sites});
      $jsonwr->num_array_prop('sea_site_hist', @{$motif->{sea_site_hist}});
    }
    $jsonwr->str_prop('fimo_gff', abs2rel(abs_path($motif->{fimo_gff}), $out_dir)) if $motif->{fimo_gff};
    $jsonwr->end_object_value();
  }
  $jsonwr->end_array_value();
  $jsonwr->property("groups");
  $jsonwr->start_array_value();
  for (my $i = 0; $i < scalar(@{$align}); $i++) {
    my $group = $align->[$i];
    $jsonwr->start_array_value();
    for (my $j = 0; $j < scalar(@{$group}); $j++) {
      my $motif_align = $group->[$j];
      $jsonwr->start_object_value();
      $jsonwr->num_prop('motif', $motif_align->{motif});
      $jsonwr->bool_prop('rc', $motif_align->{rc});
      $jsonwr->num_prop('offset', $motif_align->{offset});
      $jsonwr->end_object_value();
    }
    $jsonwr->end_array_value();
  }
  $jsonwr->end_array_value();

  $htmlwr->output();
} # output

sub main {
  my $commandline = join " ", @cmd;
  &initialise();
  my $opts = &arguments();
  mkpath($opts->{OUT_DIR}) unless (-e $opts->{OUT_DIR});
  &init_progress_log($opts->{OUT_DIR});
  my $registry = {};
  my $alphabet = &get_alphabet($opts, $registry);
  my $seqs = &get_sequences($opts, $registry, $alphabet);
  my $background = &get_background($opts, $registry, $alphabet, $seqs);
  my @motifs = ();
  push(@motifs, &run_streme($opts, $registry, $alphabet, $background, $seqs)) if (!defined($opts->{STREME_NMOTIFS}) || $opts->{STREME_NMOTIFS} > 0);
  push(@motifs, &run_meme($opts, $registry, $alphabet, $background, $seqs)) if (!defined($opts->{MEME_NMOTIFS}) || $opts->{MEME_NMOTIFS} > 0);
  my @db_counts = &run_sea($opts, $registry, $alphabet, $background, $seqs, \@motifs);
  &create_combined_file($opts, $registry, \@motifs) if (@motifs);
  &run_distr($opts, $registry, $alphabet, $background, $seqs, \@motifs) if (@motifs);
  &run_tomtom($opts, $registry, $background, \@motifs, 'streme', -2);
  &run_tomtom($opts, $registry, $background, \@motifs, 'meme', -1);
  my $align = &run_align($opts, $registry, \@motifs);
  &run_fimo($opts, $registry, $background, $seqs, \@motifs, $align)
    unless (defined($opts->{FIMO_SKIP}));
  &output($opts, $registry, $alphabet, $background, $seqs, \@db_counts, \@motifs, $align);
  &summary($opts, $registry, $commandline, $version, $release);
  print $progress_log "Done\n";
  close($progress_log);
  $logger->trace("done") if $logger;
} # main
