#!@WHICHPERL@
=head1 NAME

xstreme_webservice - Run xstreme in a restricted mode.

=head1 SYNOPSIS

xstreme_webservice [options] <local motif database>*

  Options:
    -alpha [DNA|PROTEIN|RNA]    standard alphabet
    -alphf <file>               alphabet definition file
    -p <file>			primary sequences
    -n <file>			control sequences
    -dna2rna                    convert DNA sequences to RNA
    -bfile <bg file>            specify a background file
    -order <bg order>           specify the order of the background
    -evt <num>                  specify the E-value threshold
    -minw <num>                 minimum motif width
    -maxw <num>                 maximum motif width
    -ctrim <num>		use central region for discovery and enrichment
    -align <align>              how to align sequences for site position diagrams
    -sea-noseqs                 SEA will not output matching sequences TSV file
    -upmotif <file>             uploaded motifs
    -help                       brief help message

  STREME Specific Options:
    -streme-evt <num>           stop if hold-out set E-value greater than <num>
    -streme-nmotifs <num>       maximum number of motifs to find; overrides -streme-evt

  MEME Specific Options:
    -meme-evt <num>             E-value threshold for MEME
    -meme-nmotifs <num>         maximum number of motifs to find
    -meme-mod [oops|zoops|anr]  sites used in a single sequence

  Motif Databases
    The motif databases may be specified as a pattern using * as a wildcard.
=cut

use strict;
use warnings;

use Cwd qw(getcwd abs_path);
use Fcntl qw(SEEK_SET SEEK_CUR O_RDWR);
use File::Basename qw(fileparse);
use File::Spec::Functions qw(abs2rel catfile splitdir tmpdir);
use File::Temp qw(tempfile);
use Getopt::Long;
use Pod::Usage;
use Time::HiRes qw(gettimeofday tv_interval);

use lib qw(@PERLLIBDIR@);

use Alphabet qw(rna dna protein);
use ExecUtils qw(stringify_args invoke);
use MemeWebUtils qw(is_safe_name add_status_msg update_status loggable_date write_invocation_log 
  find_in_dir dir_listing_set dir_listing added_files create_tar report_start report_status);
use Globals;

# Setup logging
my $logger = undef;
eval {
  require Log::Log4perl;
  Log::Log4perl->import();
};
unless ($@) {
  Log::Log4perl::init('@APPCONFIGDIR@/logging.conf');
  $logger = Log::Log4perl->get_logger('meme.service.xstreme');
  $SIG{__DIE__} = sub {
    return if ($^S);
    $logger->fatal(@_);
    die @_;
  };
}
$logger->trace("Starting xstreme service") if $logger;

# constants
my $tempdir = '@TMP_DIR@';
# use the perl default if none is supplied or the replace fails
$tempdir = tmpdir() if ($tempdir eq '' || $tempdir =~ m/^\@TMP[_]DIR\@$/);
my $dbdir = '@MEMEDB@/motif_databases';
my $workdir = getcwd;
my $mpi_nprocs = '@mpi_nprocs@';

# variables for the service invocation log
my $log_args = stringify_args(@ARGV);
my $log_date = loggable_date();
my $log_file = 'xstreme-log';

# error files
my $messages = "messages.txt";
my $progress = "progress_log.txt";

#error page
my $file_list;
my @arg_errors = ();
my $msg_list = [];
my $program = 'XSTREME';
my $page = 'index.html';
my $refresh = 10;

#xstreme output
my $xstreme_html = 'xstreme.html';
my $xstreme_tsv = 'xstreme.tsv';
my $xstreme_motifs = 'xstreme.txt';

# option defaults
my ($alphabet, $alpha, $alphf, $upmotif, $posfile, $negfile, 
  $dna2rna, $evt, $bfile, $order, $minw, $maxw, $ctrim, $align,
  $streme_evt, $streme_nmotifs, 
  $meme_evt, $meme_nmotifs, $meme_mod, $meme_p, $meme_searchsize,
  $sea_noseqs, $help);
my @dbmotifs;
my @motifs;
$meme_searchsize = 100000;	# Currently fixed, not an option here.
$sea_noseqs = 0; #FALSE
$help = 0; #FALSE

# redirect stderr so we can get the error message from GetOpts
my ($err_old, $err_tmp, $opts_ok, $opts_msg);
open($err_old, ">&STDERR") or die("Can't dup STDERR: $!");
$err_tmp = tempfile('GetOptions_XXXXXXXXXX', DIR => $tempdir, UNLINK => 1);
open(STDERR, '>&', $err_tmp) or die("Can't redirect STDERR: $!");
# parse options
$opts_ok = GetOptions(
  'alpha=s'         => \$alpha,
  'alphf=s'         => \$alphf,
  'p=s'             => \$posfile,
  'n=s'             => \$negfile,
  'dna2rna'         => \$dna2rna,
  'upmotif=s'       => \$upmotif,
  'evt=f'           => \$evt,
  'bfile=s'         => \$bfile,
  'order=i'         => \$order,
  'minw=i'          => \$minw,
  'maxw=i'          => \$maxw,
  'ctrim=i'         => \$ctrim,
  'align=s'         => \$align,
  'streme-evt=f'    => \$streme_evt,
  'streme-nmotifs=i'  => \$streme_nmotifs,
  'meme-evt=f'      => \$meme_evt,
  'meme-nmotifs=i'  => \$meme_nmotifs,
  'meme-mod=s'      => \$meme_mod,
  'sea-noseqs'      => \$sea_noseqs,
  'help|?'          => \$help
);
(@dbmotifs) = @ARGV;
# display help
pod2usage(1) if $help;
# reset stderr and get the error message if any
open(STDERR, ">&", $err_old) or die("Can't reset STDERR: $!");
seek($err_tmp, 0, SEEK_SET);
while ($opts_msg = <$err_tmp>) {
  chomp($opts_msg);
  push(@arg_errors, $opts_msg);
}
close($err_tmp);

# test the arguments
if (defined $alpha) {
  if ($alpha =~ m/^(?:RNA|DNA|PROTEIN)$/) {
    $alphabet = ($alpha eq 'RNA' ? rna() : ($alpha eq 'DNA' ? dna() : protein()));
  } else {
    push(@arg_errors, "Value \"$alpha\" invalid for option alpha (RNA, DNA or PROTEIN expected)");
  }
} elsif (defined($alphf)) {
  $alphf = fileparse($alphf);
  if (not is_safe_name($alphf)) {
    push(@arg_errors, "Alphabet definition \"$alphf\" does not fit allowed file name pattern.");
  } elsif (not -e $alphf) {
    push(@arg_errors, "Alphabet definition \"$alphf\" does not exist.");
  } else {
    eval { $alphabet = new Alphabet($alphf); };
    push(@arg_errors, "Alphabet definition \"$alphf\" is not a valid alphabet file.\n" . $@) if ($@);
  }
} else {
  $alphabet = dna();
}
# remove any path from the files to ensure they are in this directory
unless (defined($posfile)) {
  push(@arg_errors, "No sequences provided.");
} else {
  $posfile = fileparse($posfile);
  if (not is_safe_name($posfile)) {
    push(@arg_errors, "Sequences \"$posfile\" does not fit allowed file name pattern.");
  } elsif (not -e $posfile) {
    push(@arg_errors, "Sequences \"$posfile\" does not exist.");
  } 
}

if (defined($upmotif)) {
  $upmotif = fileparse($upmotif);
  if (not is_safe_name($upmotif)) {
    push(@arg_errors, "Value \"$upmotif\" invalid for option upmotif (does not fit allowed file name pattern)");
  } elsif (not -e $upmotif) {
    push(@arg_errors, "Value \"$upmotif\" invalid for option upmotif (file does not exist)");
  } else {
    push(@motifs, $upmotif);
  }
} 

if (@dbmotifs) {
  push(@motifs, find_in_dir($dbdir, join(' ', @dbmotifs), 'db'));
}

unless (@motifs) {
  push(@arg_errors, "No motifs provided.");
}

if (defined($negfile)) {
  $negfile = fileparse($negfile);
  if (not is_safe_name($negfile)) {
    push(@arg_errors, "Value \"$negfile\" invalid for option neg (does not fit allowed file name pattern)");
  } elsif (not -e $negfile) {
    push(@arg_errors, "Value \"$negfile\" invalid for option neg (file does not exist)");
  }
}

if (defined($bfile)) {
  $bfile = fileparse($bfile);
  if (not is_safe_name($bfile)) {
    push(@arg_errors, "Value \"$bfile\" invalid for option bfile (does not fit allowed file name pattern)");
  } elsif (not -e $bfile) {
    push(@arg_errors, "Value \"$bfile\" invalid for option bfile (file does not exist)");
  }
}

if (defined($order)) {
  if ($order < 0 || $order > 4) {
    push(@arg_errors, "Value \"$order\" invalid for option order (expected 0-4)");
  }
}

$opts_ok = 0 if (scalar(@arg_errors) > 0);
foreach my $arg_error (@arg_errors) {
  print STDERR $arg_error, "\n";
  add_status_msg($arg_error, $msg_list);
}

# setup error page
$file_list = [
    {file => $xstreme_html, desc => 'XSTREME HTML output'},
    {file => $xstreme_tsv, desc => 'XSTREME summary in TSV format'},
    {file => $xstreme_motifs, desc => 'XSTREME non-redundant motifs in MEME text format'},
    {file => $alphf, desc => 'Uploaded Alphabet'},
    {file => $posfile, desc => 'Uploaded (Primary) Sequences'},
    {file => $negfile, desc => 'Uploaded Control Sequences'},
    {file => $bfile, desc => 'Uploaded Background'},
    {file => $upmotif, desc => 'Uploaded Motifs'},
    {file => $messages, desc => 'Messages'},
];

if ($opts_ok) {
  add_status_msg('Arguments ok', $msg_list);
} else {
  add_status_msg("Error parsing arguments", $msg_list);
}

update_status($page, $program, ($opts_ok ? $refresh : 0), $file_list, 
    $msg_list, ($opts_ok ? "Starting" : "Error"));

# exit if there was an error reading the arguments
unless ($opts_ok) {
  write_invocation_log($log_file, $log_date, $log_args);
  pod2usage(2);
}

# make a symbolic link to the motif databases
symlink($dbdir, 'db');

# take a listing of all the files in the current directory
my $before = &dir_listing_set($workdir);

# Set the maximum total sequence length parameter for STREME.
# Allow streme_length_factor characters of total sequence per 1GB or core memory.
my $maxtotallength = @maxmemory@ * @streme_length_factor@;

# prepare xstreme arguments
# nb. MAXTIME is in seconds but -time is minutes
my @args = ('--oc', '.', '--time', int($MAXTIME / 60));
push(@args, '--streme-totallength', $maxtotallength);
push(@args, '--meme-searchsize', $meme_searchsize) if (defined $meme_searchsize);
push(@args, '--fdesc', 'description') if (-e 'description');
if (defined($alphf)) {
  push(@args, '--xalph', $alphf);
} else {
  push(@args, '--'. lc($alpha));
}
push(@args, '--dna2rna') if defined($dna2rna);
push(@args, '--bfile', $bfile) if defined($bfile);
push(@args, '--order', $order) if defined($order);
push(@args, '--evt', $evt) if defined($evt);
push(@args, '--minw', $minw) if defined($minw);
push(@args, '--maxw', $maxw) if defined($maxw);
push(@args, '--ctrim', $ctrim) if defined($ctrim);
push(@args, '--align', $align) if defined($align);
push(@args, '--streme-evt', $streme_evt) if defined($streme_evt);
push(@args, '--streme-nmotifs', $streme_nmotifs) if defined($streme_nmotifs);
push(@args, '--meme-evt', $meme_evt) if defined($meme_evt);
push(@args, '--meme-nmotifs', $meme_nmotifs) if defined($meme_nmotifs);
push(@args, '--meme-mod', $meme_mod) if defined($meme_mod);
push(@args, '--meme-p', $mpi_nprocs) if ($mpi_nprocs > 1);
push(@args, '--sea-noseqs') if ($sea_noseqs);
foreach my $db (@motifs) {
  push(@args, '--m', $db);
}
push(@args, '--p', $posfile);
push(@args, '--n', $negfile) if defined($negfile);

# run XSTREME
&run_xstreme(@args);
# determine all files added
my @tar_files = &added_files($before, &dir_listing_set($workdir));
# read inputs
push(@tar_files, $posfile);
push(@tar_files, $negfile) if (defined($negfile));
push(@tar_files, $bfile) if (defined($bfile));
push(@tar_files, $upmotif) if (defined($upmotif));
# create tar with all new files plus the input files
my $tar = &create_tar(1, $msg_list, $page, $program, $refresh, $file_list, $log_file, $log_date, $log_args, @tar_files);
# add the tar file to the output listing
push(@{$file_list}, {file => $tar, desc => 'Gzipped TAR file of all output'});
# update the status
add_status_msg("Done", $msg_list);
update_status($page, $program, 0, $file_list, $msg_list, "Done");
write_invocation_log($log_file, $log_date, $log_args);
exit(0);
1;

sub run_xstreme {
  my @args = @_;
  my ($status, $t0, $t1, $time, $child_pid);
  &report_start($program, 'xstreme', $msg_list, $page, $program, $refresh, $file_list, @args);
  $t0 = [&gettimeofday()];
  $child_pid = open READER, '-|';
  die("pipe/fork: $!\n") unless defined $child_pid;
  if ($child_pid == 0) { # child process
    # send stderr to file
    open(STDERR, ">", $messages) or die("Can't redirect STDERR: $!");
    # start xstreme process, this will replace the current process and should not return
    exec(catfile('@BINDIR@', 'xstreme'), @args);
    exit(1); #should not get here
  }
  # read the status updates from the child and update the index
  my ($line, $prog, $cmd, $run_time, $signal, $with_coredump, $error_code, $msg);
  while ($line = <READER>) {
    chomp($line);
    if ($line =~ /^Starting (\S+):\s+(.*)$/) {
      $prog = $1;
      $cmd = $2;
      $msg = 'XSTREME is starting subprocess <b>'.$prog.'</b><br><code>' . $cmd . '</code>'
    } elsif ($line =~ /^(\S+) ran successfully in (\d+(?:\.\d+)?) seconds/) {
      $prog = $1;
      $run_time = $2;
      $msg = '<span style="color: #008000">XSTREME subprocess <b>'.$prog.'</b> ran successfully in <b>'.sprintf('%.1f', $run_time) .'</b> seconds</span>';
    } elsif ($line =~ /^(\S+) failed to run/) {
      $prog = $1;
      $msg = '<span style="color: #800000">XSTREME failed to start <b>'.$prog.'</b></span>';
    } elsif ($line =~ /^(\S+) process died with signal (\d+), (with|without) coredump$/) {
      $prog = $1;
      $signal = $2;
      $with_coredump = ($3 eq 'with');
      $msg = '<span style="color: #800000">XSTREME subprocess <b>'.$prog.'</b> exited with signal '.$signal.'</span>';
    } elsif ($line =~ /^(\S+) exited with error code (\d+)$/) {
      $prog = $1;
      $error_code = $2;
      $msg = '<span style="color: #800000">XSTREME subprocess <b>'.$prog.'</b> exited with error code '.$error_code.'</span>';
    } elsif ($line =~ /^Ran out of time! Stopping (\S+)$/) {
      $prog = $1;
      $msg = 'XSTREME is stopping subprocess <b>'.$prog.'</b> because it used all the alloted time';
    } else {
      next;
    }
    add_status_msg($msg, $msg_list);
    update_status($page, $program, $refresh, $file_list, $msg_list, "Running XSTREME");
  }
  waitpid($child_pid, 0); # wait for the child process to exit
  $status = $?;
  $t1 = [&gettimeofday()];
  $time = &tv_interval($t0, $t1);
  # remove the simlink
  unlink('db') if (-e 'db');
  &report_status('XSTREME', $status, $time, $msg_list, $page, $program, $file_list, $log_file, $log_date, $log_args);
} # run_xstreme
