#!@WHICHPERL@

my $DEFAULT_ORDER = 2;
my $DEFAULT_MINW = 6;
my $DEFAULT_MAXW = 15;
my $DEFAULT_GROUP_THRESH = 0.05;
my $DEFAULT_FILTER_THRESH = 0.05;
my $DEFAULT_MEME_NMOTIFS = 3;

=head1 NAME

meme-chip - Does an automated analysis of a ChIPseq DNA sequence dataset with the MEME toolchain.

=cut

my $USAGE = <<"END_USAGE";

meme-chip [options] <primary sequence file>

 Options:
  -o                <dir>   : output to the specified directory, failing if the directory exists
  -oc               <dir>   : output to the specified directory, overwriting if the directory exists
  -db               <path>  : target database for use by Tomtom and CentriMo; if not present,
                              Tomtom and CentriMo are not run
  -neg              <path>  : negative (control) sequence file name;
			      the control sequences will be input to MEME, CentriMo and STREME, 
                              and MEME will use the Differential Enrichment objective function; 
                              sequences are assumed to originate from the same alphabet as the 
                              primary sequence file and should be the same length as those;
                              default: no negative sequences are used for MEME 
			      or CentriMo, and for STREME, the primary sequences 
			      are shuffled to create the negative set
  -psp-gen                    use the psp-gen program to create a position-specific
                              prior for use by MEME with its Classic objective function;
			      requires -neg;  default: input control sequences directly
			      to MEME and use its Differential Enrichment objective function
  -dna                        set the alphabet to DNA; this is the default
  -rna                        set the alphabet to RNA
  -[x]alph          <path>  : alphabet file; when the x is specified the motif
                              databases are converted to the specified alphabet;
                              default: DNA
  -dna2rna                  : input DNA sequences will be treated as RNA and discoverd motifs
                              will use the RNA alphabet; known motifs be in the RNA alphabet
                              unless you also specify --xalph with an RNA alphabet file
  -bfile            <path>  : background file
  -order            <ord>   : set the order of the Markov background model
                              that is generated from the sequences when a
                              background file is not given; default: $DEFAULT_ORDER 
  -seed             <seed>  : seed for the randomized selection of sequences
                              for MEME and the shuffling of sequences for STREME;
                              default: 1
  -minw             <num>   : minimum motif width; default: $DEFAULT_MINW
  -maxw             <num>   : maximum motif width; default: $DEFAULT_MAXW
  -ccut             <num>   : maximum size of a sequence before it is cut down 
                              to a centered section; a value of 0 indicates the
                              sequences should not be cut down; default: 100
  -group-thresh     <gthr>  : primary threshold for clustering motifs; default: $DEFAULT_GROUP_THRESH
  -group-weak       <gwk>   : secondary threshold for clustering motifs; default: 2*gthr
  -filter-thresh    <fthr>  : E-value threshold for including motifs; default: $DEFAULT_FILTER_THRESH
  -time          <minutes>  : maximum time that this program has to run and 
                              create output in; default: no limit
  -desc             <text>  : description of the job
  -dfile            <file>  : file containing plain text description of the job
  -old-clustering           : pick cluster seed motifs based only on significance;
                              default: preferentially select discovered motifs as
                              clustering seeds even if there is a library motif
                              that appears more enriched
  -noecho                   : don't echo the commands run
  -help                     : display this help message
  -version                  : print the version and exit

 MEME Specific Options:
  -meme-brief       <num>   : reduce size of MEME output files if more than <num> 
			    : primary sequences
  -meme-mod [oops|zoops|anr]: sites used in a single sequence
  -meme-nmotifs     <num>   : maximum number of motifs to find; default: $DEFAULT_MEME_NMOTIFS
                            : if =0, MEME will not be run
  -meme-minsites    <num>   : minimum number of sites per motif
  -meme-maxsites    <num>   : maximum number of sites per motif
  -meme-p           <np>    : use parallel version with <np> processors
  -meme-pal                 : look for palindromes only
  -meme-searchsize  <num>   : the maximum portion of the primary sequences (in characters)
                            : used for motif search; MEME's running time increases as 
                            : roughly the square of <num>
  -meme-nrand               : MEME should not randomize sequence order

 STREME Specific Options:
  -streme-pvt        <num>  : stop if hold-out set p-value greater than <num>
  -streme-nmotifs    <num>  : maximum number of motifs to find; overrides -streme-pvt;
                            : if =0, STREME will not be run
  -streme-totallength <num> : truncate sequence set if its total length exceeds <num>

 CentriMo Specific Options:
  -centrimo-local           : compute enrichment of all regions (not only central)
  -centrimo-score   <num>   : set the minimum allowed match score
  -centrimo-maxreg  <num>   : set the maximum region size to be considered
  -centrimo-ethresh <num>   : set the E-value threshold for reporting
  -centrimo-noseq           : don't store sequence IDs in the output
  -centrimo-flip            : reflect matches on reverse strand around center

 SpaMo Specific Options:
  -spamo-skip               : don't run SpaMo

 FIMO Specific Options:
  -fimo-skip                : don't run FIMO
END_USAGE

use strict;
use warnings;

use Cwd qw(abs_path);
use Data::Dumper;
use Fcntl qw(O_CREAT O_RDONLY O_WRONLY O_TRUNC SEEK_SET);
use File::Basename qw(fileparse);
use File::Copy qw(copy);
use File::Path qw(mkpath);
use File::Temp qw(tempfile);
use File::Spec::Functions qw(abs2rel catfile catdir splitdir tmpdir);
use Getopt::Long;
use List::Util qw(min max);
use Pod::Usage;
use POSIX qw(floor);
use Time::HiRes qw(gettimeofday tv_interval);
use XML::Simple;

use lib qw(@PERLLIBDIR@);
use Globals;
use Alphabet qw(dna rna protein);
use ExecUtils qw(invoke stringify_args2);
use MotifUtils qw(intern_to_meme read_background_file);
use MotifInMemeXML;
use MotifInStremeXML;
use JsonRdr;
use HtmlMonolithWr;

# Globals
my $t0 = [&gettimeofday()];
my $logger = undef;
my $bin_dir = undef;
my $secondary_bin_dir = undef;
my $meme_bin_dir = undef;		# just may be in subdirectory parallel/
my $etc_dir = undef;
my $script_dir = undef;
my $temp_dir = undef;
my $version = undef;
my $revision = undef;
my $release = undef;
my $progress_log = undef;
my @cmd = (scalar fileparse($0), @ARGV);
my @jobs = ();

# Run MEME-ChIP
&main();

#
# Setup some globals
#
sub initialise {
  # setup logging
  eval {
    require Log::Log4perl;
    Log::Log4perl->import();
    Log::Log4perl::init('@APPCONFIGDIR@/logging.conf');
    $logger = Log::Log4perl->get_logger('meme.service.memechip');
    $SIG{__DIE__} = sub {
      return if ($^S);
      $logger->fatal(@_);
      die @_;
    };
    $logger->trace("call initialise");
  };

  # disable buffering on stdout (it should be selected by default)
  $| = 1;

  # setup primary binary directory
  $bin_dir = $ENV{'MEME_BIN_DIR'} ? $ENV{'MEME_BIN_DIR'} : '@BINDIR@';

  # setup secondary bin directory
  $secondary_bin_dir = $ENV{'MEME_SECONDARY_BIN_DIR'} ? $ENV{'MEME_SECONDARY_BIN_DIR'} : '@LIBEXECDIR@';

  # see if there is a parallel directory under $bin_dir for running meme
  $meme_bin_dir = (-e "$bin_dir/parallel") ? "$bin_dir/parallel" : $bin_dir;

  # setup script directory
  $script_dir = $ENV{'MEME_SCRIPT_DIR'} ? $ENV{'MEME_SCRIPT_DIR'} : '@LIBEXECDIR@';

  # setup etc directory
  $etc_dir = $ENV{'MEME_DATA_DIR'} ? $ENV{'MEME_DATA_DIR'} : '@APPCONFIGDIR@';

  # setup temporary directory
  $temp_dir = '@TMP_DIR@';
  # use the perl default if none is supplied or the replace fails
  $temp_dir = &tmpdir() if ($temp_dir eq '' || $temp_dir =~ m/^\@TMP[_]DIR\@$/);

  # store program info
  $version = '@ARCHIVE_VERSION@';
  $revision = '@ARCHIVE_REVISION@';
  $release = '@ARCHIVE_DATE@';
}

sub init_progress_log {
  my ($out_dir) = @_;
  my $progress_log_filename = catfile($out_dir, 'progress_log.txt');
  sysopen($progress_log, $progress_log_filename, 
    O_CREAT | O_TRUNC | O_WRONLY) or die("Failed to create progress log file");
  # disable output buffering as this is meant to document when things go wrong
  my $old_fh = select($progress_log); $| = 1; select($old_fh);
}

sub time_allocate {
  my ($opts, $stage) = @_;
  # no limit unless maximum time set
  return undef unless $opts->{TIME};
  # calculate how long we had originally
  my $o = 60 * $opts->{TIME};
  # calculate how many seconds we have left
  my $r = $o - int(&tv_interval($t0, [&gettimeofday()]) + 0.5);
 
  my $meme_ratio = 60;
  my $streme_ratio = 100;
  my $centrimo_ratio = 40;
  my $tomtom_ratio = 10;
  my $spamo_ratio = 40;
  my $fimo_ratio = 20;

  my $left;
  my $want;

  if ($stage eq 'meme') {
    $left = $meme_ratio + $streme_ratio + $centrimo_ratio + (3 * $tomtom_ratio) + $spamo_ratio + $fimo_ratio;
    $want = $meme_ratio;
  } elsif ($stage eq 'streme') {
    $left = $streme_ratio + $centrimo_ratio + (3 * $tomtom_ratio) + $spamo_ratio + $fimo_ratio;
    $want = $streme_ratio;
  } elsif ($stage eq 'centrimo') {
    $left = $centrimo_ratio + (3 * $tomtom_ratio) + $spamo_ratio + $fimo_ratio;
    $want = $centrimo_ratio;
  } elsif ($stage eq 'meme_tomtom') {
    $left = 3 * $tomtom_ratio + $spamo_ratio + $fimo_ratio;
    $want = $tomtom_ratio;
  } elsif ($stage eq 'streme_tomtom') {
    $left = 2 * $tomtom_ratio + $spamo_ratio + $fimo_ratio;
    $want = $tomtom_ratio;
  } elsif ($stage eq 'spamo') {
    $left = $spamo_ratio + $fimo_ratio;
    $want = $spamo_ratio;
  } elsif ($stage eq 'fimo') {
    $left = $fimo_ratio;
    $want = $fimo_ratio;
  } else {
  # note all other stages are essential so will be left to run
  # even if it looks like we will completely run out of time
    $left = 1;
    $want = 1;
  }
  return int($r * ($want / $left));
}

#
# Parse the arguments
#
sub arguments {
  $logger->trace("call arguments") if $logger;
  # Set Option Defaults
  my $options = {OLD_CLUSTERING => 0, ECHO => 1, VERBOSITY => 1, 
    CLOBBER => 1, OUT_DIR => 'memechip_out',
    DESCRIPTION => undef, SEED => 1, PSP_GEN => 0,
    CCUT => $CMAX, NEG_SEQS => undef, ALPH_NAME => 'DNA', ALPH_FILE => undef, XALPH => 0,
    BFILE => undef, ORDER => $DEFAULT_ORDER, GROUP_WEAK_THRESH => undef,
    GROUP_THRESH => $DEFAULT_GROUP_THRESH, FILTER_THRESH => $DEFAULT_FILTER_THRESH, DNA2RNA => 0, TIME => undef,
    MINW => undef, MAXW => undef, MEME_MOD => 'zoops', 
    MEME_NMOTIFS => $DEFAULT_MEME_NMOTIFS, MEME_MINSITES => undef, MEME_MAXSITES => undef, 
    MEME_P => undef, MEME_PALINDROMES => 0, MEME_SEARCHSIZE => undef, MEME_NORAND => undef,
    STREME_PVT => undef, STREME_NMOTIFS => undef, STREME_TOTALLENGTH => undef,
    CENTRIMO_LOCAL => 0, CENTRIMO_SCORE => undef, 
    CENTRIMO_MAXREG => undef, CENTRIMO_ETHRESH => undef, CENTRIMO_NOSEQ => 0,
    CENTRIMO_FLIP => undef, SEQUENCES => '', DBS => [], VALGRIND => undef};
  # General Options
  my $help = 0; # FALSE
  my $show_version = 0; # FALSE
  my @errors = ();
  my @dbs = ();

  # get the options from the arguments
  my $options_success = 0; # FALSE
  my $options_err = undef;
  # redirect stderr to a temp file so we can get the error message from GetOptions
  my $olderr;
  my $tmperr = tempfile('GetOptions_XXXXXXXXXX', DIR => $temp_dir, UNLINK => 1);
  open($olderr, ">&STDERR") or die("Can't dup STDERR: $!");
  open(STDERR, '>&', $tmperr) or die("Can't redirect STDERR to temp file: $!");
  # parse options
  $options_success = GetOptions(
    'help|?'          => \$help,
    'version'         => \$show_version,
    'old-clustering'  => \$options->{OLD_CLUSTERING},
    'noecho'          => sub {$options->{ECHO} = 0},
    'o=s'             => sub {$options->{CLOBBER} = 0; shift; $options->{OUT_DIR} = shift},
    'oc=s'            => \$options->{OUT_DIR},
    'desc=s'          => \$options->{DESCRIPTION},
    'dfile|fdesc=s'    => # slurp the description into a scalar
    sub {
      my ($param, $file) = @_;
      open(my $fh, '<', $file) or die("Couldn't open description file: $!\n");
      $options->{DESCRIPTION} = do { local( $/ ); <$fh>};
      close($fh);
    },
    'seed=i'            => \$options->{SEED},
    'ccut=i'            => \$options->{CCUT},
    'time=i'            => \$options->{TIME},
    'db=s'              => \@{$options->{DBS}},
    'neg=s'             => \$options->{NEG_SEQS},
    'psp-gen'           => \$options->{PSP_GEN},
    'dna'               => sub {$options->{ALPH_NAME} = 'DNA'},
    'rna'               => sub {$options->{ALPH_NAME} = 'RNA'},
    'protein'           => sub {$options->{ALPH_NAME} = 'PROTEIN'},
    'alph=s'            => \$options->{ALPH_FILE},
    'xalph=s'           => sub {$options->{XALPH} = 1; shift; $options->{ALPH_FILE} = shift},
    'dna2rna'           => \$options->{DNA2RNA},
    'bfile=s'           => \$options->{BFILE},
    'order=i'           => \$options->{ORDER},
    'minw=i'            => \$options->{MINW}, 
    'maxw=i'            => \$options->{MAXW}, 
    'group-thresh=f'    => \$options->{GROUP_THRESH},
    'group-weak=f'      => \$options->{GROUP_WEAK_THRESH},
    'filter-thresh=f'   => \$options->{FILTER_THRESH},
    'meme-brief=i'      => \$options->{MEME_BRIEF},
    'meme-mod=s'        => \$options->{MEME_MOD},
    'meme-nmotifs=i'    => \$options->{MEME_NMOTIFS}, 
    'meme-minsites=i'   => \$options->{MEME_MINSITES},
    'meme-maxsites=i'   => \$options->{MEME_MAXSITES},
    'meme-pal'          => \$options->{MEME_PALINDROMES},
    'meme-p=s'          => \$options->{MEME_P},
    'meme-searchsize=i' => \$options->{MEME_SEARCHSIZE},
    'meme-norand'       => \$options->{MEME_NORAND},
    'streme-pvt=f'      => \$options->{STREME_PVT},
    'streme-nmotifs=i'  => \$options->{STREME_NMOTIFS},
    'streme-totallength=i'  => \$options->{STREME_TOTALLENGTH},
    'centrimo-local'    => \$options->{CENTRIMO_LOCAL},
    'centrimo-score=f'  => \$options->{CENTRIMO_SCORE},
    'centrimo-maxreg=i' => \$options->{CENTRIMO_MAXREG},
    'centrimo-ethresh=f'=> \$options->{CENTRIMO_ETHRESH},
    'centrimo-noseq'    => \$options->{CENTRIMO_NOSEQ},
    'centrimo-flip'     => \$options->{CENTRIMO_FLIP},
    'spamo-skip'        => \$options->{SPAMO_SKIP},
    'fimo-skip'         => \$options->{FIMO_SKIP},
    'verbosity=i'	=> \$options->{VERBOSITY},
    'valgrind'		=> \$options->{VALGRIND}
  );
  ($options->{SEQUENCES}) = @ARGV;
  # display version
  if ($show_version) {
    print STDOUT $version, "\n";
    exit(0);
  }
  # display help
  pod2usage($USAGE) if $help;
  # reset STDERR
  open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!");
  # read argument parsing errors
  seek($tmperr, 0, SEEK_SET);
  while (<$tmperr>) {chomp; push(@errors, $_);}
  close($tmperr);
  # by default make the weak threshold to 2 times the strong threshold
  $options->{GROUP_WEAK_THRESH} = $options->{GROUP_THRESH} * 2 unless $options->{GROUP_WEAK_THRESH};
  # check sequence
  unless (defined($options->{SEQUENCES})) {
    push(@errors, "No sequences provided.");
  } elsif (not -e $options->{SEQUENCES}) {
    push(@errors, "The sequences specified do not exist.");
  }
  # check the negative sequences exist
  if (defined($options->{NEG_SEQS}) && (not -e $options->{NEG_SEQS})) {
    push(@errors, "The control sequences specified do not exist.");
  }
  # check that negative sequences given
  if (! defined($options->{NEG_SEQS}) && $options->{PSP_GEN}) {
    push(@errors, "Option -psp-gen requires option -neg.");
  }
  # check alphabet file
  if (defined($options->{ALPH_FILE}) && (not -e $options->{ALPH_FILE})) {
    push(@errors, "Value \"$options->{ALPH_FILE}\" invalid for option alph (file does not exist)");
  }
  # check background file
  if (defined($options->{BFILE}) && (not -e $options->{BFILE})) {
    push(@errors, "Value \"$options->{BFILE}\" invalid for option bfile (file does not exist)");
  }
  # check databases
  if (@{$options->{DBS}}) {
    foreach my $db (@{$options->{DBS}}) {
      unless (-e $db) {
        push(@errors, "Value \"$db\" invalid for option db (file does not exist)");
      }
    }
  }
  # check min width
  if (defined($options->{MINW}) && ($options->{MINW} < $MINW || $options->{MINW} > $MAXW)) {
    push(@errors, "Value $options->{MINW} invalid for option minw (must be >= $MINW and <= $MAXW)");
    $options->{MINW} = undef;
  }
  # check max width
  my $MW = (defined($options->{MINW}) ? $options->{MINW} : $MINW);
  if (defined($options->{MAXW}) && ($options->{MAXW} < $MW || $options->{MAXW} > $MAXW)) {
    push(@errors, "Value $options->{MAXW} invalid for option maxw (must be >= $MW and <= $MAXW)");
  }
  # give defaults for min width and max width
  unless (defined($options->{MINW})) {
    if (defined($options->{MAXW})) {
      $options->{MINW} = min($DEFAULT_MINW, $options->{MAXW});
    } else {
      $options->{MINW} = $DEFAULT_MINW;
    }
  }
  unless (defined($options->{MAXW})) {
    $options->{MAXW} = max($DEFAULT_MAXW, $options->{MINW});
  }
  # check ccut
  if ($options->{CCUT} < $MINMEMESEQW && $options->{CCUT} != 0) {
    push(@errors, "Value \"$options->{CCUT}\" invalid for option ccut (must be >= $MINMEMESEQW or 0)");
  }
  # check time
  if (defined($options->{TIME}) && $options->{TIME} < 1) {
    push(@errors, "Value \"$options->{TIME}\" invalid for option time (must be >= 1)");
  }
  # check background markov order
  if ($options->{ORDER} < 0) {
    push(@errors, "Value \"$options->{ORDER}\" invalid for option order (must be >= 0)");
  }
  # check meme mode
  if ($options->{MEME_MOD} !~ m/^(oops|zoops|anr)$/) {
    push(@errors, "Value \"$options->{MEME_MOD}\" invalid for option meme-mod (must be oops, zoops or anr)");
  }
  # check meme motif count
  if ($options->{MEME_NMOTIFS} < 0) {
    push(@errors, "Value $options->{MEME_NMOTIFS} invalid for option meme-nmotifs (must be >= 0)");
  }
  # check meme min sites
  if (defined($options->{MEME_MINSITES}) && 
    ($options->{MEME_MINSITES} < $MINSITES || 
      $options->{MEME_MINSITES} > $MAXSITES)) {
    push(@errors, "Value $options->{MEME_MINSITES} invalid for option meme-minsites (must be >= $MINSITES and <= $MAXSITES)");
    $options->{MEME_MINSITES} = undef;
  }
  # check meme max sites
  my $MS = (defined($options->{MEME_MINSITES}) ? $options->{MEME_MINSITES} : $MINSITES);
  if (defined($options->{MEME_MAXSITES}) && 
    ($options->{MEME_MAXSITES} < $MS || 
      $options->{MEME_MAXSITES} > $MAXSITES)) {
    push(@errors, "Value $options->{MEME_MAXSITES} invalid for option meme-maxsites (must be >= $MS and <= $MAXSITES)");
  }
  # check meme searchsize
  if (defined($options->{MEME_SEARCHSIZE}) && $options->{MEME_SEARCHSIZE} < 0) {
    push(@errors, "Value $options->{MEME_SEARCHSIZE} invalid for option meme-searchsize (must be >= 0)");
  }
  # check meme p
  if (defined($options->{MEME_P}) && $options->{MEME_P} !~ m/^\s*[1-9]\d*(?:\s.*)?$/) {
    push(@errors, "Value \"$options->{MEME_P}\" invalid for option meme-p (must begin with a positive integer)");
  }
  # check streme pvt
  if (defined($options->{STREME_PVT}) && $options->{STREME_PVT} < 0) {
    push(@errors, "Value \"$options->{STREME_PVT}\" invalid for option streme-pvt (must be >= 0)");
  }
  # check streme motif count
  if (defined($options->{STREME_NMOTIFS}) && $options->{STREME_NMOTIFS} < 0) {
    push(@errors, "Value \"$options->{STREME_NMOTIFS}\" invalid for option streme-nmotifs (must be >= 0)");
  }
  # check streme totallength 
  if (defined($options->{STREME_TOTALLENGTH}) && $options->{STREME_TOTALLENGTH} < 0) {
    push(@errors, "Value \"$options->{STREME_TOTALLENGTH}\" invalid for option streme-totallength (must be >= 0)");
  }
  # check centrimo score (How? As long as it's a float then it's valid)
  # check centrimo maxreg
  if (defined($options->{CENTRIMO_MAXREG}) && $options->{CENTRIMO_MAXREG} < 1) {
    push(@errors, "Value \"$options->{CENTRIMO_MAXREG}\" invalid for option centrimo-maxreg (must be >= 1)");
  }
  # check centrimo ethresh
  if (defined($options->{CENTRIMO_ETHRESH}) && $options->{CENTRIMO_ETHRESH} < 0) {
    push(@errors, "Value \"$options->{CENTRIMO_ETHRESH}\" invalid for option ".
      "centrimo-ethresh (must be a valid E-value)");
  }
  # check verbosity
  if ($options->{VERBOSITY} < 1 || $options->{VERBOSITY} > 5) {
    push(@errors, "Value \"$options->{VERBOSITY}\" invalid for option -verbosity (must be >= 1 and <=5)");
  }

  # check that we're not clobbering something we're not meant to
  if (-e $options->{OUT_DIR}) {
    unless (-d $options->{OUT_DIR}) {
      push(@errors, "Can't output to \"$options->{OUT_DIR}\" as it is not a directory.\n");
    } elsif (!$options->{CLOBBER}) {
      push(@errors, "Can't output to \"$options->{OUT_DIR}\" as the directory already exists.\n");
    }
  }
  # print errors
  foreach my $error (@errors) {
    print STDERR $error, "\n";
  }
  pod2usage($USAGE) if @errors;
  # return options
  return $options;
}

sub job_ok {
  $logger->trace("call job_ok") if $logger;
  my ($registry, $job) = @_;
  my $entry = $registry->{$job};
  return defined($entry) && $entry->{status} == 0;
}

sub nmotifs {
  $logger->trace("call nmotifs") if $logger;
  my ($registry, $job) = @_;
  my $entry = $registry->{$job};
  if (defined($entry) && $entry->{status} == 0) {
    return $entry->{NMOTIFS};
  }
  return 0;
}

sub log_10_to_str {
  my ($value, $prec) = @_;
  my ($m, $e);
  $e = floor($value);
  $m = 10 ** ($value - $e);

  # check that rounding up won't cause a 9.9999 to go to a 10
  if ($m + (.5 * (10 ** -$prec)) >= 10) {
    $m = 1;
    $e += 1;
  }
  return sprintf("%.*fe%+04.0f", $prec, $m, $e);
}

sub str_to_log_10 {
  my ($str) = @_;
  my ($m, $e);
  $m = 1;
  $e = 0;
  if ($str =~ m/^\+?(\d*\.?\d+)(?:[eE]([+-]?\d+))?$/) {
    $m = $1;
    $e = $2 if (defined($2));
  }
  return (log($m) / log(10)) + $e;
}

#
# Run a job and do something with the command line and stuff...
#
sub run_job {
  $logger->trace("call run_job") if $logger;
  my ($opts, $registry, $name, $required, $job, $suppress_messages, $outputs) = @_;
  # check that all requirements are avialable
  foreach my $require (@{$required}) {
    my $reg = $registry->{$require};
    unless (defined($reg) && $reg->{status} == 0) {
      $logger->warn("skipped $name due to missing requirement $require") if $logger;
      print $progress_log "Skipped $name due to missing requirement $require\n";
      print "WARNING: skipped $name due to missing requirement $require.\n";
      return; # missing requirement
    }
  }
  # store any unstored messages
  my %msg = ();
  my $msg_file = catfile($opts->{OUT_DIR}, $name . "_msgs.txt");
  if (!(defined($job->{ALL_FILE}) || defined($job->{ALL_VAR}))) {
    if (!(defined($job->{ERR_FILE}) || defined($job->{ERR_VAR})) && 
      !(defined($job->{OUT_FILE}) || defined($job->{OUT_VAR}))) {
      $msg{ALL_FILE} = $msg_file;
    } elsif (!(defined($job->{ERR_FILE}) || defined($job->{ERR_VAR}))) {
      $msg{ERR_FILE} = $msg_file;
    } elsif (!(defined($job->{OUT_FILE}) || defined($job->{OUT_VAR}))) {
      $msg{OUT_FILE} = $msg_file;
    }
  }
  # convert the command to a string
  my $cmd;
  $cmd = &ExecUtils::stringify_args2(%{$job});
  # echo the command
  $logger->debug("About to invoke: $cmd") if $logger;
  print $progress_log "Invoking:\n  $cmd\n";
  print "Starting $job->{PROG}: $cmd\n" if ($opts->{ECHO});
  # run the command
  my $time;
  my $oot; # out of time flag
  $opts->{VALGRIND} = "valgrind" if (defined $opts->{VALGRIND});
  my $status = &ExecUtils::invoke(%{$job}, %msg, TMPDIR => $temp_dir,
    TIME => \$time, OOT => \$oot, VALGRIND => $opts->{VALGRIND}); 
  $logger->debug("Finished invoke of $name with status $status") if $logger;
  print $progress_log "Finished invoke:\n".
      "  name: $name  status: $status  time: $time" . 
      ($oot ? "  (ran out of time!)\n" : "\n");
  if ($opts->{ECHO}) {
    # echo program messages
    if (-s $msg_file && $opts->{ECHO} && !$suppress_messages) {
      my $fh;
      sysopen($fh, $msg_file, O_RDONLY);
      while (<$fh>) { print $_ };
      close($fh);
    }
    # print timeout warnings
    if ($oot) {
      print "Ran out of time! Stopping $job->{PROG}\n"
    }
    # print sucess or failure to stdout
    if ($status == 0) {
      print "$job->{PROG} ran successfully in $time seconds\n"
    } else {
      if ($status == -1) {
        print "$job->{PROG} failed to run\n";
      } elsif ($status & 127) {
        print "$job->{PROG} process died with signal " . ($status & 127) . 
            ", " . (($status & 128) ? 'with' : 'without') . " coredump\n";
      } else {
        print "$job->{PROG} exited with error code " . ($status >> 8) . "\n";
      }
    }
  }
  # remove messages file if it's empty
  unlink($msg_file) unless (-s $msg_file);
  # find which directory it used
  my $out_dir;
  for (my $i = 0; $i < scalar(@{$job->{ARGS}})-1; $i++) {
    my $arg = $job->{ARGS}->[$i];
    if ($arg eq '-oc' || $arg eq '--oc') {
      $out_dir = $job->{ARGS}->[$i+1] if (-d $job->{ARGS}->[$i+1]);
      last;
    }
  }
  # store the result
  my $entry = {name => $name, cmd => $cmd, dir => $out_dir, 
    messages => $msg_file, status => $status, time => $time, 
    oot => $oot, outputs => $outputs};
  $registry->{$name} = $entry;
  push(@jobs, $entry); 
} # run_job

#
# Check if the file is the same
#
sub same_file {
  $logger->trace("call same_file") if $logger;
  my ($fn1, $fn2) = @_;
  my ($dev1,$ino1) = stat($fn1);
  my ($dev2,$ino2) = stat($fn2);
  return ($dev1 == $dev2 && $ino1 == $ino2);
}

# 
# Load the alphabet
#
sub get_alphabet {
  $logger->trace("call get_alphabet") if $logger;
  my ($opts, $registry) = @_;
  my $out_dir = $opts->{OUT_DIR};
  my ($alph_name, $alph_file, $alph_obj);
  if ($opts->{ALPH_FILE}) {
    $alph_file = catfile($out_dir, (fileparse($opts->{ALPH_FILE}))[0]);
    # link the alphabet file to the output directory, if it's not already there
    if (-e $alph_file) {
      if (!&same_file($opts->{ALPH_FILE}, $alph_file)) {
        unlink($alph_file) or die("Can't write the alphabet file");
        unless (link($opts->{ALPH_FILE}, $alph_file)) {
          copy($opts->{ALPH_FILE}, $alph_file) or die("Failed to copy alphabet file: $!");
        }
      } 
    } else {
      unless (link($opts->{ALPH_FILE}, $alph_file)) {
        copy($opts->{ALPH_FILE}, $alph_file) or die("Failed to copy alphabet file: $!");
      }
    }
    $alph_obj = new Alphabet($alph_file);
  } else {
    $alph_name = $opts->{ALPH_NAME};
    die("Can't use option -dna2rna with non-DNA sequences!") if ($opts->{DNA2RNA} && $alph_name ne 'DNA');
    if ($alph_name eq 'DNA') {
      if ($opts->{DNA2RNA}) {
        $alph_name = 'RNA';
        $alph_obj = Alphabet::rna();
      } else {
        $alph_obj = Alphabet::dna();
      }
    } elsif ($alph_name eq 'RNA') {
      $alph_obj = Alphabet::rna();
    } elsif ($alph_name eq 'PROTEIN') {
      print STDERR "Warning: MEME-ChIP is not really designed for protein\n";
      $alph_obj = Alphabet::protein();
    }
  }
  die("Name or file must be defined!") unless (defined($alph_name) || defined($alph_file));
  my @args = (defined($alph_file) ? ('-alph', $alph_file) :  ('-' . lc($alph_name)));
  return {name => $alph_name, file => $alph_file, obj => $alph_obj, args => \@args};
}

sub get_background {
  $logger->trace("call get_background") if $logger;
  my ($opts, $registry, $alphabet, $seqs, $neg_seqs) = @_;
  my $alph = $alphabet->{obj};
  my $out_dir = $opts->{OUT_DIR};
  my $bfile;
  if ($opts->{BFILE}) {
    $bfile = catfile($out_dir, (fileparse($opts->{BFILE}))[0]);
    # link the background file to the output directory, if it's not already there
    if (-e $bfile) {
      if (!&same_file($opts->{BFILE}, $bfile)) {
        unlink($bfile) or die("Can't write the background file");
        unless (link($opts->{BFILE}, $bfile)) {
          copy($opts->{BFILE}, $bfile) or die("Failed to copy background: $!");
        }
      } 
    } else {
      unless (link($opts->{BFILE}, $bfile)) {
        copy($opts->{BFILE}, $bfile) or die("Failed to copy background: $!");
      }
    }
    $registry->{'bg'} = {status => 0};
  } else {
    $bfile = catfile($out_dir, 'background');
    # generate a background file and calculate sequence metrics
    my $order = $opts->{ORDER};
    my $sequences = defined $neg_seqs ? $neg_seqs->{ORIGINAL} : $seqs->{ORIGINAL};
    &run_job(
      $opts, $registry, 'bg', [],
      {
        PROG => 'fasta-get-markov',
        BIN => $secondary_bin_dir,
        ARGS => ['-nostatus', '-nosummary', @{$alphabet->{args}}, '-m', $order, $sequences, $bfile]
      }, 1, 
      [{FILE => $bfile, NAME => 'Background'}]
    );
  }
  # get the background
  my %bg = read_background_file($alph, $bfile);
  return {file => $bfile, obj => \%bg};
}

# 
# Generate sequence inputs by trimming/shuffling etc.
#
sub get_sequences {
  $logger->trace("call get_sequences") if $logger;
  my ($opts, $registry, $alphabet) = @_;
  my $alph = $alphabet->{obj};

  my $pos_input = $opts->{SEQUENCES};
  my $out_dir = $opts->{OUT_DIR};

  # create file names
  my $seqs = catfile($out_dir, (fileparse($pos_input))[0]);
  my $seqs_centered = catfile($out_dir, "seqs-centered");
  my $seqs_discarded = catfile($out_dir, "seqs-discarded");

  # link the sequences to sequences in the output directory if they 
  # aren't already there
  if (-e $seqs) {
    if (!&same_file($pos_input, $seqs)) {
      unlink($seqs) or die("Can't write the sequence file");
      unless (link($pos_input, $seqs)) {
        copy($pos_input, $seqs) or die("Failed to copy sequences: $!");
      }
    } 
  } else {
    unless(link($pos_input, $seqs)) {
      copy($pos_input, $seqs) or die("Failed to copy sequences: $!");
    }
  }

  my $metrics;
  # count the number of sequences
  my ($count, $min_len, $max_len, $avg_len, $total_len);
  &run_job($opts, $registry, 'count_seqs', [],
           {PROG => 'getsize', BIN => $secondary_bin_dir, ARGS => [$seqs],
            OUT_VAR => \$metrics, OUT_NAME => '$metrics'});
  die("getsize failed me...") unless ($metrics =~ m/^(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s/);
  $count = $1;
  $min_len = $2;
  $max_len = $3;
  $avg_len = $4;
  $total_len = $5;

  # find the length which occurs most in the sequence
  my $most_len;
  &run_job($opts, $registry, 'most_seqs', [], {PROG => 'fasta-most', BIN => $script_dir, 
      ARGS => ['-min', 50], IN_FILE => $seqs, OUT_VAR => \$metrics, OUT_NAME => '$metrics'});
  die("fasta-most failed me...") unless ($metrics =~ m/^(\d+) (\d+)/);
  # use zero if we can't find any sequences longer than 50
  # that way centrimo will just use the default behaviour and it won't crash
  if ($2 > 0) {
    $most_len = $1;
  } else {
    $most_len = 0;
  }

  # cut out the center of the sequences
  if ($opts->{CCUT}) {
    &run_job($opts, $registry, 'center_seqs', [], 
      {
        PROG => 'fasta-center', BIN => $script_dir, 
        ARGS => [@{$alphabet->{args}}, '-len', $opts->{CCUT}],
        IN_FILE => $seqs, OUT_FILE => $seqs_centered
      },
      0, [{FILE => $seqs_centered}]);
  } else {
    $seqs_centered = $seqs;
    $registry->{'center_seqs'} = {status => 0};
  }

  # return the sequences
  return {
    ORIGINAL => $seqs, CENTERED => $seqs_centered,
    COUNT => $count,
    MIN_LEN => $min_len, MAX_LEN => $max_len, AVG_LEN => $avg_len, 
    TOTAL_LEN => $total_len, MOST_LEN => $most_len
  };
}

sub get_control_sequences {
  $logger->trace("call get_control_sequences") if $logger;
  my ($opts, $registry, $alphabet) = @_;
  my $out_dir = $opts->{OUT_DIR};
  my $neg_input = $opts->{NEG_SEQS};
  return undef unless defined $neg_input;
  # create the file names
  my $control = catfile($out_dir, (fileparse($neg_input))[0]);
  my $control_centered = catfile($out_dir, "control-centered");
  # copy the files
  if (-e $control) {
    if (!&same_file($neg_input, $control)) {
      unlink($control) or die("Can't write the sequence file");
      unless (link($neg_input, $control)) {
        copy($neg_input, $control) or die("Failed to copy sequences: $!");
      }
    } 
  } else {
    unless(link($neg_input, $control)) {
      copy($neg_input, $control) or die("Failed to copy sequences: $!");
    }
  }

  my $metrics;
  # count the number of control sequences
  my ($count, $min_len, $max_len, $avg_len, $total_len);
  &run_job($opts, $registry, 'count_seqs', [],
           {PROG => 'getsize', BIN => $secondary_bin_dir, ARGS => [$control],
            OUT_VAR => \$metrics, OUT_NAME => '$metrics'});
  die("getsize failed me...") unless ($metrics =~ m/^(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s/);
  $count = $1;
  $min_len = $2;
  $max_len = $3;
  $avg_len = $4;
  $total_len = $5;

  # cut out the center of the control sequences
  if ($opts->{CCUT}) {
    &run_job($opts, $registry, 'center_control', [], 
      {
        PROG => 'fasta-center', BIN => $script_dir, 
        ARGS => [@{$alphabet->{args}}, '-len', $opts->{CCUT}],
        IN_FILE => $control, OUT_FILE => $control_centered
      },
      0, [{FILE => $control_centered}]);
  } else {
    $control_centered = $control;
    $registry->{'center_control'} = {status => 0};
  }
  return {ORIGINAL => $control, CENTERED => $control_centered,
    COUNT => $count, MIN_LEN => $min_len, MAX_LEN => $max_len,
    AVG_LEN => $avg_len, TOTAL_LEN => $total_len};
}

#
# Run psp-gen to create position specific priors corresponding to the
# positive and negative sequence sets.
#
sub get_psp {
  $logger->trace("call get_psp") if $logger;
  my ($opts, $registry, $alphabet, $seqs, $control) = @_;
  my $alph = $alphabet->{obj};
  # create the file names
  my $pspfile = catfile($opts->{OUT_DIR}, 'psp');
  # use the minw and maxw settings for finding the PSP but
  # trim to the allowed range for PSPs
  # the actual width set by the PSP finder is that with the highest
  # score before normalizing; allow X or N or other nonspecific residue/base
  # codes (but score any sites containing them as zero)
  my $psp_minw = $opts->{MINW} < $MINPSPW ? $MINPSPW : $opts->{MINW};
  my $psp_maxw = $opts->{MAXW} > $MAXPSPW ? $MAXPSPW : $opts->{MAXW};
  # create the psp
  my @args = ('-pos', $seqs->{CENTERED}, '-neg', $control->{CENTERED},
    '-minw', $psp_minw, '-maxw', $psp_maxw, @{$alphabet->{args}});
  push(@args, '-revcomp') if ($alph->has_complement());
  push(@args, '-verbose') if (defined($opts->{VERBOSITY}) && $opts->{VERBOSITY} > 1);
  &run_job($opts, $registry, 'psp',
      ['center_seqs', 'center_control'],
      {PROG => 'psp-gen', BIN => $script_dir, ARGS => \@args, OUT_FILE => $pspfile},
      0, [{FILE => $pspfile}]);
  return $pspfile;
}

#
# Load motifs using the passed parser
#
sub parse_motifs {
  $logger->trace("call parse_motifs") if $logger;
  my ($prog, $db, $bg, $parser, $file) = @_;
  my $fh;
  unless(sysopen($fh, $file, O_RDONLY)) {
    warn("Failed to open the output of $prog to parse motifs.\n");
    return ();
  }
  local $/ = \100;
  while (<$fh>) {
    $parser->parse_more($_);
  }
  close($fh);
  $parser->parse_done();
  my @motifs = $parser->get_motifs();
  my $nmotifs = scalar(@motifs);
  foreach my $motif (@motifs) {
    $motif->{file} = $file;
    $motif->{bg} = $bg;
    $motif->{db} = $db;
    $motif->{prog} = $prog;
    $motif->{sig} = &str_to_log_10($motif->{evalue});
  }
  return ($nmotifs, @motifs);
}

#
# Run MEME and load the motifs it finds into memory
#
sub meme {
  $logger->trace("call meme") if $logger;
  my ($opts, $registry, $alphabet, $background, $seqs, $neg_seqs, $psp) = @_;
  my $alph = $alphabet->{obj};
  # calculate the time avaliable to run MEME
  my $timeout = &time_allocate($opts, 'meme');
  my $meme_dir = catdir($opts->{OUT_DIR}, "meme_out");
  my @args = (
    $seqs->{CENTERED}, '-oc', $meme_dir, 
    '-mod', $opts->{MEME_MOD}, '-nmotifs', $opts->{MEME_NMOTIFS}, 
    '-minw', $opts->{MINW}, '-maxw', $opts->{MAXW},
    '-bfile', $background->{file}, @{$alphabet->{args}}
  );
  push(@args, '-minsites', $opts->{MEME_MINSITES}) if (defined($opts->{MEME_MINSITES}));
  push(@args, '-maxsites', $opts->{MEME_MAXSITES}) if (defined($opts->{MEME_MAXSITES}));
  push(@args, '-searchsize', $opts->{MEME_SEARCHSIZE}) if (defined($opts->{MEME_SEARCHSIZE}));
  push(@args, '-norand') if $opts->{MEME_NORAND};
  # try to give MEME 60 seconds to finish before the timeout
  push(@args, '-time', ($timeout > 60 ? $timeout - 60 : 1)) if ($timeout);
  push(@args, '-p', $opts->{MEME_P}) if (defined($opts->{MEME_P}));
  if (defined($neg_seqs)) {
    if (defined($psp)) {
      push(@args, '-psp', $psp);
    } else {
      push(@args, '-objfun', 'de', '-neg', $neg_seqs->{CENTERED});
    }
  }
  push(@args, '-revcomp') if ($alph->has_complement());
  push(@args, '-pal') if $opts->{MEME_PALINDROMES};
  push(@args, '-brief', $opts->{MEME_BRIEF}) if (defined($opts->{MEME_BRIEF}));
  push(@args, '-nostatus') unless $opts->{VERBOSITY} >= 2;
  my @dependencies = ('bg');
  push(@dependencies, 'psp') if (defined($psp));
  &run_job($opts, $registry, 'meme', \@dependencies, 
      {PROG => 'meme', BIN => $meme_bin_dir, ARGS => \@args, TIMEOUT => $timeout},
      0,[{FILE => catfile($meme_dir, 'meme.html'), NAME => 'MEME HTML'}, 
        {FILE => catfile($meme_dir, 'meme.txt'), NAME => 'MEME text'}, 
        {FILE => catfile($meme_dir, 'meme.xml'), NAME => 'MEME XML'}
      ]);

  if (&job_ok($registry, 'meme')) {
    my ($nmotifs, @motifs) = &parse_motifs('meme', -1, $background->{obj}, new MotifInMemeXML(), catfile($meme_dir, 'meme.xml'));
    $registry->{meme}->{NMOTIFS} = $nmotifs;
    return @motifs;
  }  
  return ();
}

#
# Run STREME and load the motifs it finds into memory
#
sub streme {
  $logger->trace("call streme") if $logger;
  my ($opts, $registry, $alphabet, $background, $seqs, $neg_seqs) = @_;
  
  # calculate the time avaliable to run STREME
  my $timeout = &time_allocate($opts, 'streme');
  my $streme_dir = catdir($opts->{OUT_DIR}, "streme_out");

  my @args = (
    '--verbosity', $opts->{VERBOSITY}, '--oc', $streme_dir, @{$alphabet->{args}},
    '--p', $seqs->{CENTERED});

  # add the control sequences if provided
  push(@args, '--n', $neg_seqs->{CENTERED}) if ($neg_seqs);

  # add the motif width range
  push(@args, '--minw', $opts->{MINW}, '--maxw', $opts->{MAXW});
  
  # try to give STREME 60 seconds to finish before the timeout
  push(@args, '--time', ($timeout > 60 ? $timeout - 60 : 1)) if ($timeout);
  push(@args, '--order', $opts->{ORDER});
  push(@args, '--thresh', $opts->{STREME_PVT}) if defined($opts->{STREME_PVT});
  push(@args, '--nmotifs', $opts->{STREME_NMOTIFS}) if defined($opts->{STREME_NMOTIFS});
  push(@args, '--totallength', $opts->{STREME_TOTALLENGTH}) if defined($opts->{STREME_TOTALLENGTH});

  &run_job($opts, $registry, 'streme', ['center_seqs'], {
      PROG => 'streme', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout},
      0, [{FILE => catfile($streme_dir, 'streme.html'), NAME => 'STREME HTML'},
        {FILE => catfile($streme_dir, 'streme.txt'), NAME => 'STREME text'},
        {FILE => catfile($streme_dir, 'streme.xml'), NAME => 'STREME XML'}
      ]);

  if (&job_ok($registry, 'streme')) {
    my ($nmotifs, @motifs) = &parse_motifs('streme', -2, $background->{obj}, new MotifInStremeXML(), catfile($streme_dir, "streme.xml"));
    $registry->{streme}->{NMOTIFS} = $nmotifs;
    return @motifs;
  }
  return ();
}

# 
# Run CentriMo to find centrally enriched motifs and to create
# distribution graphs.
#
sub centrimo {
  $logger->trace("call centrimo") if $logger;
  my ($opts, $registry, $alphabet, $background, $seqs, $neg_seqs, $motifs) = @_;
  my $alph = $alphabet->{obj};
  # calculate the time avaliable to run CentriMo
  my $timeout = &time_allocate($opts, 'centrimo');
  my @motif_inputs = ();
  my @db_map = ();
  my @db_counts = ();
  if (&nmotifs($registry, 'meme') > 0) {
    push(@motif_inputs, catfile($registry->{meme}->{dir}, 'meme.xml'));
    push(@db_map, -1);
  }
  if (&nmotifs($registry, 'streme') > 0) {
    push(@motif_inputs, catfile($registry->{streme}->{dir}, 'streme.xml'));
    push(@db_map, -2);
  }
  push(@motif_inputs, @{$opts->{DBS}});
  foreach (0 .. (scalar(@{$opts->{DBS}}) - 1)) {
    push(@db_map, $_);
    push(@db_counts, 0);
  }
  return @db_counts unless @motif_inputs;

  my $centrimo_dir = catdir($opts->{OUT_DIR}, 'centrimo_out');
  my @args = ('-seqlen', $seqs->{MOST_LEN}, '-verbosity', 
    $opts->{VERBOSITY}, '-oc', $centrimo_dir, '-bfile', $background->{file});
  push(@args, '-xalph', $alphabet->{file}) if ($opts->{XALPH});
  push(@args, '-local') if $opts->{CENTRIMO_LOCAL};
  push(@args, '-score', $opts->{CENTRIMO_SCORE}) if (defined($opts->{CENTRIMO_SCORE}));
  push(@args, '-maxreg', $opts->{CENTRIMO_MAXREG}) if (defined($opts->{CENTRIMO_MAXREG}));
  push(@args, '-ethresh', $opts->{CENTRIMO_ETHRESH}) if (defined($opts->{CENTRIMO_ETHRESH}));
  push(@args, '-noseq') if $opts->{CENTRIMO_NOSEQ};
  push(@args, '-flip') if $opts->{CENTRIMO_FLIP};
  push(@args, '-neg', $neg_seqs->{ORIGINAL}) if (defined($opts->{NEG_SEQS}));
  push(@args, $seqs->{ORIGINAL}, @motif_inputs);
  &run_job($opts, $registry, 'centrimo', ['bg'], {
      PROG => 'centrimo', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout},
      0, [{FILE => catfile($centrimo_dir, 'centrimo.html'), NAME => 'CentriMo HTML'},
        {FILE => catfile($centrimo_dir, 'site_counts.txt'), NAME => 'Site Counts'}
      ]);

  return @db_counts unless &job_ok($registry, 'centrimo');

  # create a lookup table so we can look up our motifs using the
  # centrimo results
  my %m_map = ();
  for (my $i = 0; $i < scalar(@{$motifs}); $i++) {
    my $motif = $motifs->[$i];
    $m_map{$motif->{db}} = {} unless defined($m_map{$motif->{db}});
    # MEME IDs are not unique, so use ALT_ID.
    if ($motif->{prog} eq "meme") {
      $m_map{$motif->{db}}->{$motif->{alt}} = $motif;
    } else {
      $m_map{$motif->{db}}->{$motif->{id}} = $motif;
    }
  }

  my $parser = new JsonRdr();
  my $info = $parser->load_file(catfile($centrimo_dir, 'centrimo.html'),
    qr/^data>sequences$/, qr/^data>motifs>seqs$/);
  $seqs->{CENTRIMO_SEQLEN} = $info->{data}->{seqlen};
  # record the motif database sizes
  my $info_dbs = $info->{data}->{motif_dbs};
  for (my $i = 0; $i < scalar(@{$info_dbs}); $i++) {
    next if $db_map[$i] < 0;
    $db_counts[$db_map[$i]] = $info_dbs->[$i]->{count};
  }
  my $log_tested = log($info->{data}->{tested});
  my $log10_thresh = log($opts->{FILTER_THRESH}) / log(10);
  my $info_motifs = $info->{data}->{motifs};
  for (my $i = 0; $i < scalar(@{$info_motifs}); $i++) {
    my $info_motif = $info_motifs->[$i];
    my $info_peak = $info_motif->{peaks}->[0];
    my $log10_evalue;
    if (defined($opts->{NEG_SEQS})) {
      $log10_evalue = ($info_peak->{fisher_log_adj_pvalue} + $log_tested) / log(10);
    } else {
      $log10_evalue = ($info_peak->{log_adj_pvalue} + $log_tested) / log(10);
    }
    my $motif = $m_map{$db_map[$info_motif->{db}]}->{$info_motif->{id}};
    # MEME motifs will be undefined under ID, so look up under ALT_ID.
    if (! $motif && defined $info_motif->{alt}) { $motif = $m_map{$db_map[$info_motif->{db}]}->{$info_motif->{alt}}; }
    if ($motif) {
      if ($log10_evalue <= $log10_thresh) {
        $motif->{centrimo_sites} = $info_motif->{sites};
        $motif->{centrimo_total_sites} = $info_motif->{total_sites};
      }
      # check if CentriMo thinks the motif is better than MEME/STREME does
      if ($motif->{sig} > $log10_evalue) {
        $motif->{prog} = 'centrimo';
        $motif->{sig} = $log10_evalue;
      }
    } elsif ($log10_evalue <= $log10_thresh) {
      $motif = {
        prog => 'centrimo',
        bg => $background->{obj}, strands => 2, db => $db_map[$info_motif->{db}], 
        file => $opts->{DBS}->[$db_map[$info_motif->{db}]],
        id => $info_motif->{id}, alt => $info_motif->{alt}, 
	consensus => $info_motif->{consensus},
        url => $info_motif->{url}, width => $info_motif->{len}, 
        sites => $info_motif->{motif_nsites}, 
        pseudo => 0.1, evalue => $info_motif->{motif_evalue},
        pspm => {}, centrimo_sites => $info_motif->{sites},
        centrimo_total_sites => $info_motif->{total_sites},
        sig => $log10_evalue 
      };
      for (my $a = 0; $a < $alph->size_core(); $a++) {
        my @column = ();
        for (my $i = 0; $i < $motif->{width}; $i++) {
          $column[$i] = $info_motif->{pwm}->[$i]->[$a];
        }
        $motif->{pspm}->{$alph->char($a)} = \@column;
      }
      push(@{$motifs}, $motif);
    }
  }
  return @db_counts;
}


# 
# Run Tomtom to find similar motifs
#
sub tomtom {
  $logger->trace("call tomtom") if $logger;
  my ($opts, $registry, $background, $motifs, $prog, $db) = @_;
  return unless @{$opts->{DBS}};
  return unless &nmotifs($registry, $prog) > 0;

  my $job_name = $prog.'_tomtom';
  my $timeout = &time_allocate($opts, $job_name);
  my $input_motifs = catfile($registry->{$prog}->{dir}, $prog.'.xml');
  my $tomtom_dir = catdir($opts->{OUT_DIR}, $prog.'_tomtom_out');
  my @args = ('-verbosity', $opts->{VERBOSITY}, '-oc', $tomtom_dir, 
    '-min-overlap', 5, '-dist', 'pearson', '-evalue', '-thresh', 1.0, '-no-ssc');
    #'-bfile', $background->{file}); # has no effect with 'pearson'
  push(@args, '-xalph') if ($opts->{XALPH});
  push(@args, $input_motifs, @{$opts->{DBS}});
  &run_job($opts, $registry, $job_name, ['bg', $prog], {
      PROG => 'tomtom', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout},
      0, [{FILE => catfile($tomtom_dir, 'tomtom.html'), NAME => 'Tomtom HTML'},
        {FILE => catfile($tomtom_dir, 'tomtom.tsv'), NAME => 'Tomtom TSV'},
        {FILE => catfile($tomtom_dir, 'tomtom.xml'), NAME => 'Tomtom XML'}
      ]);

  return unless &job_ok($registry, $job_name);

  my $data = XMLin(
    catfile($tomtom_dir, 'tomtom.xml'),
    KeyAttr => [], ForceArray => 1
  );
  #print Dumper($data);
  #print Dumper($motifs);

  # Make an index from query ID to list of matching IDXs.
  my $queries = $data->{queries}->[0]->{motif};
  my $targets = $data->{targets}->[0]->{motif};
  my %query_id_to_target_idxs = ();
  my %query_id_to_idx = ();
  my $matches = $data->{matches}->[0]->{query};
  if (defined $matches) {
    for (my $i = 0; $i < scalar(@{$matches}); $i++) {
      my $q_idx = $matches->[$i]->{idx};
      my $q_id = $queries->[$q_idx]->{id};
      $query_id_to_idx{$q_id} = $q_idx;
      my $q_targets = $matches->[$i]->{target};
      my @target_list = ();
      for (my $j = 0; $j < scalar(@{$q_targets}); $j++) {
        my $t_idx = $q_targets->[$j]->{idx};
        my $t_id = $targets->[$t_idx]->{id};
        push(@target_list, $t_idx);
      }
      #printf "query idx %s : matches %s\n", $q_idx, join(" ", @target_list);
      $query_id_to_target_idxs{$q_id} = \@target_list;
    }
  }
   
  # Iterate over the motif objects and add the non-self matches.
  for (my $i = 0; $i < scalar(@{$motifs}); $i++) {
    my $motif = $motifs->[$i];
    next if ($motif->{db} ne $db);
    # create the list of matches
    $motif->{matches} = [];
    my $q_id = $motif->{id};
    # set tomtom index of query
    $motif->{idx} = $query_id_to_idx{$q_id};
    # get list of target indices for matching motifs
    my $target_list = $query_id_to_target_idxs{$q_id};
    next unless defined $target_list; 
    # convert each match idx to a match hash and add it to the motif's matches
    for (my $j = 0; $j < scalar(@{$target_list}); $j++) {
      my $t_idx = $target_list->[$j];
      my $target = $targets->[$t_idx];
      my $match = {db => $target->{db}, idx => $t_idx, id => $target->{id}, alt => $target->{alt}, url => $target->{url}};
      #print "query ", $q_id, " match ", $match, "\n";
      push(@{$motif->{matches}}, $match);
    }
  }
}

#
# run Tomtom to align all the motifs that we've found
#
sub align {
  $logger->trace("call align") if $logger;
  my ($opts, $registry, $motifs) = @_;

  return [] unless (@{$motifs});

  @{$motifs} = sort {$a->{sig} <=> $b->{sig}} @{$motifs};
  my @aligns = ();
  my @used = ();

  # write out the motifs to a combined file, use numbers
  # to identify them
  my $combined_motifs_file = catfile($opts->{OUT_DIR}, 'combined.meme');
  my $fh;
  sysopen($fh, $combined_motifs_file, O_WRONLY | O_CREAT | O_TRUNC);
  for (my $i = 0; $i < scalar(@{$motifs}); $i++) {
    my %motif = %{$motifs->[$i]};
    $motif{alt} = (defined $motif{id} && defined $motif{alt}) ? $motif{id} . '-' . $motif{alt} : (defined $motif{id}) ? $motif{id} : '';
    $motif{id} = $i+1;		# Start indices at 1
    $motif{url} = '';
    print $fh intern_to_meme(\%motif, 0, 1, $i == 0);
    $aligns[$i] = {};
    $used[$i] = 0;
  }
  close($fh);
  
  # run tomtom in text mode
  my $align_file = catfile($opts->{OUT_DIR}, 'motif_alignment.txt');
  my @args = ('-verbosity', $opts->{VERBOSITY}, '-text', 
    '-thresh', $opts->{GROUP_WEAK_THRESH}, $combined_motifs_file, $combined_motifs_file);
  &run_job($opts, $registry, 'align', [], {
      PROG => 'tomtom', BIN => $bin_dir, ARGS => \@args, OUT_FILE => $align_file}, 1, [
        {FILE => $align_file, NAME => 'Motif Alignment'}
      ]);
  return [] unless &job_ok($registry, 'align');

  # read in the alignment
  sysopen($fh, $align_file, O_RDONLY);
  my $line = <$fh>;				# skip header line
  while ($line = <$fh>) {
    # remove comments
    $line =~ s/#.*$//;
    # skip empty lines
    next unless $line =~ m/\S/;
    # split into columns
    my @cols = split(/\t/, $line);
    # skip if wrong column count
    next unless scalar(@cols) == 10;
    # get the values we're interested in
    my $query = int($cols[0])-1;		# readjust motif indices to start at 0
    my $target = int($cols[1])-1;		# readjust motif indices to start at 0
    my $offset = -int($cols[2]);
    my $rc = ($cols[9] =~ '-' ? 1 : 0);
    my $qval = $cols[5]; $qval =~ s/^\s+//; $qval =~ s/\s+$//; $qval += 0;
    $aligns[$query]->{$target} = {offset => $offset, rc => $rc, qval => $qval};
  }
  close($fh);

  my @seeds;
  if (!$opts->{OLD_CLUSTERING}) {
    # sort the seeds so MEME and STREME motifs are first
    my @buffer = ();
    for (my $i = 0; $i < scalar(@{$motifs}); $i++) {
      if ($motifs->[$i]->{db} >= 0) {
        push(@buffer, $i);
      } else {
        push(@seeds, $i);
      }
    }
    push(@seeds, @buffer);
  } else {
    # keep ordered by evalue
    my $max_seed = scalar(@{$motifs}) - 1;
    @seeds = (0..$max_seed);
  }
  # use a greedy algorithm to group the motifs
  # under their best examples
  my @groups = ();
  my %motif_group = ();
  foreach my $i (@seeds) {
    next if $used[$i];
    # mark self as used
    $used[$i] = 1;
    $motif_group{$i} = scalar(@groups);
    # add self to the group
    my @group = ({motif => $i, offset => 0, rc => 0, is_seed => 1});
    # add all aligned motifs to the group
    while (my ($m, $align) = each %{$aligns[$i]}) {
      next if $used[$m];
      # skip weak links on first pass
      next if ($align->{qval} > $opts->{GROUP_THRESH});
      $used[$m] = 1;
      $motif_group{$m} = scalar(@groups);
      push(@group, {motif => $m, offset => $align->{offset}, 
          rc => $align->{rc}});
    }
    push(@groups, \@group);
  }

  # now look at each of the groups from best to worst
  # and look at the weak connections 
  for (my $g = 0; $g < scalar(@groups)-1; $g++) {
    my $dest = $groups[$g];
    next unless $dest;
    my $i = $dest->[0]->{motif};
    my %mergable = ();
    while (my ($m, $align) = each %{$aligns[$i]}) {
      my $candidate = $motif_group{$m};
      # skip links to our or more significant groups
      next if ($candidate <= $g);
      # skip if we've already decided the group is mergeable
      next if ($mergable{$candidate});
      # skip when motifs in the group that would reject merging
      next if (scalar(grep {!defined($aligns[$i]->{$_->{motif}})} @{$groups[$candidate]}));
      # record that the group is mergable
      $mergable{$candidate} = $groups[$candidate];
      $groups[$candidate] = undef;
    }
    # merge all the mergable groups
    foreach my $src (values %mergable) {
      foreach my $m (map {$_->{motif}} @{$src}) {
        my $align = $aligns[$i]->{$m};
        $motif_group{$m} = $g;
        push(@{$dest}, {motif => $m, offset => $align->{offset}, 
            rc => $align->{rc}});
      }
    }
  }
  # remove merged groups
  @groups = grep {defined($_)} @groups;
  %motif_group = ();

  # finally sort and re-align
  for (my $g = 0; $g < scalar(@groups); $g++) {
    my $group = $groups[$g];

    # sort the group, ensure the seed is first but others are in order of significance
    @{$group} = sort {$a->{is_seed} ? -1 : ($b->{is_seed} ? 1 : ($a->{motif} <=> $b->{motif}))} @{$group};

    # calculate leftmost extreme
    my $left_extreme = 0;
    foreach my $align (@{$group}) {
      $left_extreme = $align->{offset} if ($align->{offset} < $left_extreme);
    }
    # realign and calculate total width
    my $total_width = 0;
    foreach my $align (@{$group}) {
      $align->{offset} -= $left_extreme;
      my $len = $align->{offset} + $motifs->[$align->{motif}]->{width};
      $total_width = $len if ($len > $total_width);
    }
  }

  # sort the groups so they are in order of seed motif significance
  # this is only required with the new clustering method
  @groups = sort {$a->[0]->{motif} <=> $b->[0]->{motif}} @groups;

  return \@groups;
}

sub spamo {
  $logger->trace("call spamo") if $logger;
  my ($opts, $registry, $background, $seqs, $motifs, $align) = @_;
  my $t_spamo_start = [&gettimeofday()];
  my $full_timeout = &time_allocate($opts, "spamo");

  my @motif_files = ();
  foreach my $prog ('meme', 'streme') {
    next unless &job_ok($registry, $prog);
    next unless $registry->{$prog}->{NMOTIFS} > 0;
    push(@motif_files, catfile($registry->{$prog}->{dir}, $prog.'.xml'));
  }
  return unless @motif_files;

  my $nmotifs = 0;
  for (my $i = 0; $i < @{$align}; $i++) {
    my $key_motif = $motifs->[$align->[$i]->[0]->{motif}];
    # Skip primary motifs from databases.
    next unless (grep {$_ eq $key_motif->{file}} @motif_files);
     
    my $timeout = undef;
    if (defined($full_timeout)) {
      $timeout = $full_timeout - int(&tv_interval($t_spamo_start, [&gettimeofday()]) + 0.5);
      last if $timeout <= 0;
    }

    my $job_id = 'spamo'.($nmotifs+1);
    my $spamo_dir = catdir($opts->{OUT_DIR}, 'spamo_out_'.($nmotifs+1));
    my @args = ('-verbosity', 1, '-oc', $spamo_dir, '-bgfile', $background->{file},
      '-keepprimary', '-primary', $key_motif->{id});
    push(@args, '-xalph') if ($opts->{XALPH});
    push(@args, $seqs->{ORIGINAL}, $key_motif->{file}, @motif_files);
    &run_job($opts, $registry, $job_id, [], {
        PROG => 'spamo', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout}, 1, [
          {FILE => catfile($spamo_dir, 'spamo.html'), NAME => 'SpaMo HTML'},
          {FILE => catfile($spamo_dir, 'spamo.xml'), NAME => 'SpaMo XML'}
        ]);
    
    next unless &job_ok($registry, $job_id);

    $key_motif->{spamo_html} = catfile($spamo_dir, 'spamo.html');
    $nmotifs++;
  }
}

sub fimo {
  $logger->trace("call fimo") if $logger;
  my ($opts, $registry, $background, $seqs, $motifs, $align) = @_;
  my $t_fimo_start = [&gettimeofday()];
  my $full_timeout = &time_allocate($opts, "fimo");

  my @motif_files = ();
  foreach my $prog ('meme', 'streme') {
    next unless &job_ok($registry, $prog);
    next unless $registry->{$prog}->{NMOTIFS} > 0;
    push(@motif_files, catfile($registry->{$prog}->{dir}, $prog.'.xml'));
  }
  return unless @motif_files;

  my $nmotifs = 0;
  for (my $i = 0; $i < @{$align}; $i++) {
    my $key_motif = $motifs->[$align->[$i]->[0]->{motif}];
    # Skip primary motifs from databases.
    next unless (grep {$_ eq $key_motif->{file}} @motif_files);
    my $timeout = undef;
    if (defined($full_timeout)) {
      $timeout = $full_timeout - int(&tv_interval($t_fimo_start, [&gettimeofday()]) + 0.5);
      last if $timeout <= 0;
    }

    my $job_id = 'fimo'.($nmotifs+1);
    my $fimo_dir = catdir($opts->{OUT_DIR}, 'fimo_out_'.($nmotifs+1));
    my @args = ('--parse-genomic-coord', '--verbosity', 1, '--oc', $fimo_dir,
      '--bgfile', $background->{file}, '--motif', $key_motif->{id}, $key_motif->{file},
      $seqs->{ORIGINAL});

    &run_job($opts, $registry, $job_id, [], {
        PROG => 'fimo', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout}, 1, [
          {FILE => catfile($fimo_dir, 'fimo.gff'), NAME => 'FIMO GFF'},
          {FILE => catfile($fimo_dir, 'fimo.html'), NAME => 'FIMO HTML'},
          {FILE => catfile($fimo_dir, 'fimo.tsv'), NAME => 'FIMO TSV'}
        ]);
    
    next unless &job_ok($registry, $job_id);

    $key_motif->{fimo_gff} = catfile($fimo_dir, 'fimo.gff');
    $nmotifs++;
  }

}

sub summary {
  my ($opts, $registry, $commandline, $version, $release) = @_;
  my $infile = $opts->{OUT_DIR} . '/meme-chip.html';
  my $outfile = $opts->{OUT_DIR} . '/summary.tsv';
  &run_job(
    $opts, $registry, 'summary', [],
    {
      PROG => 'meme-chip_html_to_tsv',
      BIN => $script_dir,
      ARGS => [$infile, $outfile, $commandline, $version, $release]
    }, 1, 
    [{FILE => $outfile, NAME => 'Summary'}]
  );
} # summary

sub output {
  $logger->trace("call output") if $logger;
  print $progress_log "Writing output\n";
  my ($opts, $registry, $alphabet, $background, $seqs, $neg_seqs, $db_counts, $motifs, $align) = @_;
  my $alph = $alphabet->{obj};
  my $bg = $background->{obj};
  my $html_file = catfile($opts->{OUT_DIR}, 'meme-chip.html');
  my $htmlwr = new HtmlMonolithWr($etc_dir, 'meme-chip_template.html', 
    $html_file, 'meme-chip_data.js' => 'data');
  $htmlwr->set_logger($logger) if $logger;
  my $jsonwr = $htmlwr->output();
  my $out_dir = abs_path($opts->{OUT_DIR});
  $jsonwr->str_prop('filter_thresh', $opts->{FILTER_THRESH});
  $jsonwr->str_prop('version', $version);
  $jsonwr->str_prop('revision', $revision);
  $jsonwr->str_prop('release', $release);
  $jsonwr->str_prop('description', $opts->{DESCRIPTION}) if $opts->{DESCRIPTION};
  $jsonwr->str_array_prop('cmd', @cmd);
  $jsonwr->property('programs');
  $jsonwr->start_array_value();
  for (my $i = 0; $i < scalar(@jobs); $i++){
    my $job = $jobs[$i];
    $jsonwr->start_object_value();
    $jsonwr->str_prop('name', $job->{name});
    $jsonwr->str_prop('cmd', $job->{cmd});
    $jsonwr->num_prop('status', $job->{status});
    $jsonwr->bool_prop('oot', $job->{oot});
    $jsonwr->num_prop('time', $job->{time});
    if (-e $job->{messages}) {
      $jsonwr->str_prop('messages_file', abs2rel(abs_path($job->{messages}), $out_dir))
    }
    $jsonwr->property('outputs');
    $jsonwr->start_array_value();
    if ($job->{outputs}) {
      for (my $j = 0; $j < scalar(@{$job->{outputs}}); $j++) {
        my $output = $job->{outputs}->[$j];
        if (-s $output->{FILE}) {
          $jsonwr->start_object_value();
          if ($output->{NAME}) {
            $jsonwr->str_prop('name', $output->{NAME});
          } else {
            my ($name) = fileparse($output->{FILE});
            $jsonwr->str_prop('name', $name);
          }
          $jsonwr->str_prop('file', abs2rel(abs_path($output->{FILE}), $out_dir));
          $jsonwr->end_object_value();
        }
      }
    }
    $jsonwr->end_array_value();
    $jsonwr->end_object_value();
  }
  $jsonwr->end_array_value();
  $jsonwr->property('alphabet');
  $alph->to_json($jsonwr);
  $jsonwr->property('background');
  $jsonwr->start_object_value();
  $jsonwr->str_prop('source', (defined($opts->{BFILE}) ? $opts->{BFILE} : '--sequences--')); 
  $jsonwr->property('freqs');
  $jsonwr->start_array_value();
  for (my $i = 0; $i < $alph->size_core(); $i++) {
    $jsonwr->num_value($bg->{$alph->char($i)});
  }
  $jsonwr->end_array_value();
  $jsonwr->end_object_value();
  $jsonwr->property('sequence_db');
  $jsonwr->start_object_value();
  $jsonwr->str_prop('source', $opts->{SEQUENCES});
  $jsonwr->num_prop('count', $seqs->{COUNT});
  $jsonwr->str_prop('file', abs2rel(abs_path($seqs->{ORIGINAL}), $out_dir));
  if (defined($seqs->{CENTRIMO_SEQLEN})) {
    $jsonwr->num_prop('centrimo_seqlen', $seqs->{CENTRIMO_SEQLEN});
  }
  $jsonwr->end_object_value();

  if ($opts->{NEG_SEQS}) {
    $jsonwr->property('neg_sequence_db');
    $jsonwr->start_object_value();
    $jsonwr->str_prop('source', $opts->{NEG_SEQS});
    $jsonwr->num_prop('count', $neg_seqs->{COUNT});
    $jsonwr->str_prop('file', abs2rel(abs_path($neg_seqs->{ORIGINAL}), $out_dir));
    $jsonwr->end_object_value();
  }

  $jsonwr->property("motif_dbs");
  $jsonwr->start_array_value();
  for (my $i = 0; $i < scalar(@{$opts->{DBS}}); $i++) {
    $jsonwr->start_object_value();
    $jsonwr->str_prop('source', $opts->{DBS}->[$i]);
    $jsonwr->num_prop('count', $db_counts->[$i]);
    $jsonwr->end_object_value();
  }
  $jsonwr->end_array_value();
  $jsonwr->property('motif_count');
  $jsonwr->start_object_value();
  $jsonwr->num_prop('meme', &nmotifs($registry, 'meme'));
  $jsonwr->num_prop('streme', &nmotifs($registry, 'streme'));
  $jsonwr->end_object_value();
  $jsonwr->property('motifs');
  $jsonwr->start_array_value();
  for (my $i = 0; $i < scalar(@{$motifs}); $i++) {
    my $motif = $motifs->[$i];
    $jsonwr->start_object_value();
    $jsonwr->str_prop('prog', $motif->{prog});
    $jsonwr->num_prop('db', $motif->{db});
    $jsonwr->str_prop('id', $motif->{id});
    $jsonwr->str_prop('idx', $motif->{idx}) if (defined $motif->{idx});
    $jsonwr->str_prop('alt', $motif->{alt}) if ($motif->{alt});
    if ($motif->{consensus}) {
      $jsonwr->str_prop('consensus', $motif->{consensus}) 
    } else {
      my $consensus = $motif->{id};
      $consensus =~ s/^\d-//;		# remove motif number from STREME ids	
      $jsonwr->str_prop('consensus', $consensus) 
    }
    $jsonwr->num_prop('len', $motif->{width});
    $jsonwr->str_prop('evalue', $motif->{evalue});
    $jsonwr->num_prop('sites', $motif->{sites});
    $jsonwr->str_prop('url', $motif->{url}) if ($motif->{url});
    $jsonwr->str_prop('sig', &log_10_to_str($motif->{sig}, 1));
    $jsonwr->property('pwm');
    $jsonwr->start_array_value();
    for (my $j = 0; $j < $motif->{width}; $j++) {
      $jsonwr->start_array_value();
      for (my $k = 0; $k < $alph->size_core(); $k++) {
        $jsonwr->num_value($motif->{pspm}->{$alph->char($k)}->[$j]);
      }
      $jsonwr->end_array_value();
    }
    $jsonwr->end_array_value();
    $jsonwr->property('tomtom_matches');
    $jsonwr->start_array_value();
    if ($motif->{matches}) {
      for (my $j = 0; $j < scalar(@{$motif->{matches}}); $j++) {
        my $match = $motif->{matches}->[$j];
        $jsonwr->start_object_value();
        $jsonwr->num_prop('db', $match->{db});
        $jsonwr->str_prop('idx', $match->{idx});
        $jsonwr->str_prop('id', $match->{id});
        $jsonwr->str_prop('alt', $match->{alt}) if ($match->{alt});
        $jsonwr->str_prop('url', $match->{url}) if ($match->{url});
        $jsonwr->end_object_value();
      }
    }
    $jsonwr->end_array_value();
    if ($motif->{centrimo_total_sites}) {
      $jsonwr->num_prop('centrimo_total_sites', $motif->{centrimo_total_sites});
      $jsonwr->num_array_prop('centrimo_sites', @{$motif->{centrimo_sites}});
    }
    $jsonwr->str_prop('spamo_html', abs2rel(abs_path($motif->{spamo_html}), $out_dir)) if $motif->{spamo_html};
    $jsonwr->str_prop('fimo_gff', abs2rel(abs_path($motif->{fimo_gff}), $out_dir)) if $motif->{fimo_gff};
    $jsonwr->end_object_value();
  }
  $jsonwr->end_array_value();
  $jsonwr->property("groups");
  $jsonwr->start_array_value();
  for (my $i = 0; $i < scalar(@{$align}); $i++) {
    my $group = $align->[$i];
    $jsonwr->start_array_value();
    for (my $j = 0; $j < scalar(@{$group}); $j++) {
      my $motif_align = $group->[$j];
      $jsonwr->start_object_value();
      $jsonwr->num_prop('motif', $motif_align->{motif});
      $jsonwr->bool_prop('rc', $motif_align->{rc});
      $jsonwr->num_prop('offset', $motif_align->{offset});
      $jsonwr->end_object_value();
    }
    $jsonwr->end_array_value();
  }
  $jsonwr->end_array_value();

  $htmlwr->output();
}

sub main {
  my $commandline = join " ", @cmd;
  &initialise();
  my $opts = &arguments();
  mkpath($opts->{OUT_DIR}) unless (-e $opts->{OUT_DIR});
  &init_progress_log($opts->{OUT_DIR});
  my $registry = {};
  my $alphabet = &get_alphabet($opts, $registry);
  my $seqs = &get_sequences($opts, $registry, $alphabet);
  my $neg_seqs = &get_control_sequences($opts, $registry, $alphabet) if ($opts->{NEG_SEQS});
  my $background = &get_background($opts, $registry, $alphabet, $seqs, $neg_seqs);
  my $psp = &get_psp($opts, $registry, $alphabet, $seqs, $neg_seqs) if ($opts->{PSP_GEN});
  my @motifs = ();
  push(@motifs, &meme($opts, $registry, $alphabet, $background, $seqs, $neg_seqs, $psp)) if ($opts->{MEME_NMOTIFS} > 0);
  push(@motifs, &streme($opts, $registry, $alphabet, $background, $seqs, $neg_seqs)) if (!defined($opts->{STREME_NMOTIFS}) || $opts->{STREME_NMOTIFS} > 0);
  my @db_counts = &centrimo($opts, $registry, $alphabet, $background, $seqs, $neg_seqs, \@motifs);
  &tomtom($opts, $registry, $background, \@motifs, 'meme', -1);
  &tomtom($opts, $registry, $background, \@motifs, 'streme', -2);
  # filter motifs by E-value
  my $log10_thresh = log($opts->{FILTER_THRESH}) / log(10);
  @motifs = grep {$_->{sig} < $log10_thresh} @motifs;
  my $align = &align($opts, $registry, \@motifs);
  &spamo($opts, $registry, $background, $seqs, \@motifs, $align) 
    unless (defined($opts->{SPAMO_SKIP}));
  &fimo($opts, $registry, $background, $seqs, \@motifs, $align)
    unless (defined($opts->{FIMO_SKIP}));
  &output($opts, $registry, $alphabet, $background, $seqs, $neg_seqs, \@db_counts, \@motifs, $align);
  &summary($opts, $registry, $commandline, $version, $release);
  print $progress_log "Done\n";
  close($progress_log);
  $logger->trace("done") if $logger;
}
