#!@WHICHPERL@
=head1 NAME

meme-chip_webservice - Run meme-chip in a restricted mode.

=head1 SYNOPSIS

meme-chip_webservice [options] <sequences file> <motif databases>

  Options:
    -alpha [DNA|PROTEIN|RNA]    standard alphabet
    -alphf <file>               alphabet definition file
    -upmotif <file>             uploaded motifs
    -bfile <bg file>            specifiy a background file
    -order <bg order>           specifiy the order of the background
    -norc                       search the given strand only.
    -norand                     select sequence for MEME in file order instead of randomly.
    -help                       brief help message

  MEME Specific Options:
    -meme-mod [oops|zoops|anr]  sites used in a single sequence
    -meme-minw <num>            minimum motif width
    -meme-maxw <num>            maximum motif width
    -meme-nmotifs <num>         maximum number of motifs to find
    -meme-minsites <num>        minimum number of sites per motif
    -meme-maxsites <num>        maximum number of sites per motif
    -meme-pal                   look for palindromes only

  DREME Specific Options:
    -dreme-e <num>              maximum motif E-value
    -dreme-m <num>              maximum number of motifs to find

  CentriMo Specific Options:
    -centrimo-local             compute enrichment of all regions (not only central)
    -centrimo-score <num>       minimum match score
    -centrimo-maxreg <num>      maximum central enrichment region
    -centrimo-ethresh <num>     threshold for reporting enrichment
    -centrimo-noseq             don't store sequence ids in output

  Motif Databases
    The motif databases may be specified as a pattern using * as a wildcard.
=cut

use strict;
use warnings;

use Cwd qw(getcwd abs_path);
use Fcntl qw(SEEK_SET SEEK_CUR O_RDWR);
use File::Basename qw(fileparse);
use File::Spec::Functions qw(abs2rel catfile splitdir tmpdir);
use File::Temp qw(tempfile);
use Getopt::Long;
use Pod::Usage;
use Time::HiRes qw(gettimeofday tv_interval);


use lib qw(@PERLLIBDIR@);

use Alphabet qw(rna dna protein);
use ExecUtils qw(stringify_args invoke);
use MemeWebUtils qw(is_safe_name add_status_msg update_status loggable_date write_invocation_log find_in_dir);
use Globals;

# Setup logging
my $logger = undef;
eval {
  require Log::Log4perl;
  Log::Log4perl->import();
};
unless ($@) {
  Log::Log4perl::init('@APPCONFIGDIR@/logging.conf');
  $logger = Log::Log4perl->get_logger('meme.service.memechip');
  $SIG{__DIE__} = sub {
    return if ($^S);
    $logger->fatal(@_);
    die @_;
  };
}
$logger->trace("Starting meme-chip service") if $logger;

# constants
my $tempdir = '@TMP_DIR@';
# use the perl default if none is supplied or the replace fails
$tempdir = tmpdir() if ($tempdir eq '' || $tempdir =~ m/^\@TMP[_]DIR\@$/);
my $dbdir = '@MEMEDB@/motif_databases';
my $workdir = getcwd;

# variables for the service invocation log
my $log_args = stringify_args(@ARGV);
my $log_date = loggable_date();
my $log_file = 'memechip-log';

# error files
my $messages = "messages.txt";
my $progress = "progress_log.txt";

#error page
my $file_list;
my @arg_errors = ();
my $msg_list = [];
my $program = 'MEME-ChIP';
my $page = 'index.html';
my $refresh = 10;

#meme-chip output
my $memechip_html = 'meme-chip.html';

# option defaults
my ($alphabet, $alpha, $alphf, $upmotif, $negfile, $bfile, $order, $norc, $norand,
  $meme_mod, $meme_minw, $meme_maxw, $meme_nmotifs, 
  $meme_minsites, $meme_maxsites, $meme_pal, $dreme_e, $dreme_m, 
  $centrimo_score, $centrimo_maxreg, $centrimo_ethresh, $centrimo_noseq,
  $centrimo_local, $help);
my $sequences;
my @dbmotifs;
my @motifs;
$norc = 0; #FALSE
$norand = 0; #FALSE
$meme_mod = 'zoops';
$meme_pal = 0; #FALSE
$centrimo_noseq = 0; # FALSE
$help = 0; #FALSE

# redirect stderr so we can get the error message from GetOpts
my ($err_old, $err_tmp, $opts_ok, $opts_msg);
open($err_old, ">&STDERR") or die("Can't dup STDERR: $!");
$err_tmp = tempfile('GetOptions_XXXXXXXXXX', DIR => $tempdir, UNLINK => 1);
open(STDERR, '>&', $err_tmp) or die("Can't redirect STDERR: $!");
# parse options
$opts_ok = GetOptions(
  'alpha=s'         => \$alpha,
  'alphf=s'         => \$alphf,
  'upmotif=s'       => \$upmotif,
  'neg=s'           => \$negfile,
  'bfile=s'         => \$bfile,
  'order=i'         => \$order,
  'norc'            => \$norc,
  'norand'          => \$norand,
  'meme-mod=s'      => \$meme_mod,
  'meme-minw=i'     => \$meme_minw,
  'meme-maxw=i'     => \$meme_maxw,
  'meme-nmotifs=i'  => \$meme_nmotifs,
  'meme-minsites=i' => \$meme_minsites,
  'meme-maxsites=i' => \$meme_maxsites,
  'meme-pal'        => \$meme_pal,
  'dreme-e=f'         => \$dreme_e,
  'dreme-m=i'         => \$dreme_m,
  'centrimo-score=f'  => \$centrimo_score,
  'centrimo-maxreg=i' => \$centrimo_maxreg,
  'centrimo-ethresh=f'=> \$centrimo_ethresh,
  'centrimo-local'    => \$centrimo_local,
  'centrimo-noseq'    => \$centrimo_noseq,
  'help|?'          => \$help
);
($sequences, @dbmotifs) = @ARGV;
# display help
pod2usage(1) if $help;
# reset stderr and get the error message if any
open(STDERR, ">&", $err_old) or die("Can't reset STDERR: $!");
seek($err_tmp, 0, SEEK_SET);
while ($opts_msg = <$err_tmp>) {
  chomp($opts_msg);
  push(@arg_errors, $opts_msg);
}
close($err_tmp);

# test the arguments
if (defined $alpha) {
  if ($alpha =~ m/^(?:RNA|DNA|PROTEIN)$/) {
    $alphabet = ($alpha eq 'RNA' ? rna() : ($alpha eq 'DNA' ? dna() : protein()));
  } else {
    push(@arg_errors, "Value \"$alpha\" invalid for option alpha (RNA, DNA or PROTEIN expected)");
  }
} elsif (defined($alphf)) {
  $alphf = fileparse($alphf);
  if (not is_safe_name($alphf)) {
    push(@arg_errors, "Alphabet definition \"$alphf\" does not fit allowed file name pattern.");
  } elsif (not -e $alphf) {
    push(@arg_errors, "Alphabet definition \"$alphf\" does not exist.");
  } else {
    eval { $alphabet = new Alphabet($alphf); };
    push(@arg_errors, "Alphabet definition \"$alphf\" is not a valid alphabet file.\n" . $@) if ($@);
  }
} else {
  $alphabet = dna();
}
# remove any path from the files to ensure they are in this directory
unless (defined($sequences)) {
  push(@arg_errors, "No sequences provided.");
} else {
  $sequences = fileparse($sequences);
  if (not is_safe_name($sequences)) {
    push(@arg_errors, "Sequences \"$sequences\" does not fit allowed file name pattern.");
  } elsif (not -e $sequences) {
    push(@arg_errors, "Sequences \"$sequences\" does not exist.");
  } 
}

if (defined($upmotif)) {
  $upmotif = fileparse($upmotif);
  if (not is_safe_name($upmotif)) {
    push(@arg_errors, "Value \"$upmotif\" invalid for option upmotif (does not fit allowed file name pattern)");
  } elsif (not -e $upmotif) {
    push(@arg_errors, "Value \"$upmotif\" invalid for option upmotif (file does not exist)");
  } else {
    push(@motifs, $upmotif);
  }
} 

if (@dbmotifs) {
  push(@motifs, find_in_dir($dbdir, join(' ', @dbmotifs), 'db'));
}

unless (@motifs) {
  push(@arg_errors, "No motifs provided.");
}

if (defined($negfile)) {
  $negfile = fileparse($negfile);
  if (not is_safe_name($negfile)) {
    push(@arg_errors, "Value \"$negfile\" invalid for option neg (does not fit allowed file name pattern)");
  } elsif (not -e $negfile) {
    push(@arg_errors, "Value \"$negfile\" invalid for option neg (file does not exist)");
  }
}

if (defined($bfile)) {
  $bfile = fileparse($bfile);
  if (not is_safe_name($bfile)) {
    push(@arg_errors, "Value \"$bfile\" invalid for option bfile (does not fit allowed file name pattern)");
  } elsif (not -e $bfile) {
    push(@arg_errors, "Value \"$bfile\" invalid for option bfile (file does not exist)");
  }
}

if (defined($order)) {
  if ($order < 0 || $order > 4) {
    push(@arg_errors, "Value \"$order\" invalid for option order (expected 0-4)");
  }
}

$opts_ok = 0 if (scalar(@arg_errors) > 0);
foreach my $arg_error (@arg_errors) {
  print STDERR $arg_error, "\n";
  add_status_msg($arg_error, $msg_list);
}

# setup error page
$file_list = [
    {file => $memechip_html, desc => 'MEME-ChIP html output'},
    {file => $alphf, desc => 'Uploaded Alphabet'},
    {file => $sequences, desc => 'Uploaded Sequences'},
    {file => $negfile, desc => 'Uploaded Negative sequences'},
    {file => $bfile, desc => 'Uploaded Background'},
    {file => $upmotif, desc => 'Uploaded Motifs'},
];

if ($opts_ok) {
  add_status_msg('Arguments ok', $msg_list);
} else {
  add_status_msg("Error parsing arguments", $msg_list);
}

update_status($page, $program, ($opts_ok ? $refresh : 0), $file_list, 
    $msg_list, ($opts_ok ? "Starting" : "Error"));

# exit if there was an error reading the arguments
unless ($opts_ok) {
  write_invocation_log($log_file, $log_date, $log_args);
  pod2usage(2);
}

# make a symbolic link to the motif databases
symlink($dbdir, 'db');

# take a listing of all the files in the current directory
my $before = &dir_listing_set($workdir);

# prepare meme-chip arguments
# nb. MAXTIME is in seconds but -time is minutes
my @args = ('-oc', '.', '-index-name', $memechip_html,
  '-time', int($MAXTIME / 60));
push(@args, '-fdesc', 'description') if (-e 'description');
if (defined($alphf)) {
  push(@args, '-xalph', $alphf);
} elsif ($alpha ne 'DNA') {
  push(@args, '-'. lc($alpha));
}
push(@args, '-neg', $negfile) if $negfile;
push(@args, '-bfile', $bfile) if $bfile;
push(@args, '-order', $order) if $order;
push(@args, '-norc') if $norc;
push(@args, '-norand') if $norand;
foreach my $db (@motifs) {
  push(@args, '-db', $db);
}
push(@args, '-meme-mod', $meme_mod) if $meme_mod;
push(@args, '-meme-minw', $meme_minw) if $meme_minw;
push(@args, '-meme-maxw', $meme_maxw) if $meme_maxw;
push(@args, '-meme-nmotifs', $meme_nmotifs) if defined($meme_nmotifs);
push(@args, '-meme-minsites', $meme_minsites) if $meme_minsites;
push(@args, '-meme-maxsites', $meme_maxsites) if $meme_maxsites;
push(@args, '-meme-pal') if $meme_pal;
push(@args, '-dreme-e', $dreme_e) if defined($dreme_e);
push(@args, '-dreme-m', $dreme_m) if defined($dreme_m);
push(@args, '-centrimo-local') if $centrimo_local;
push(@args, '-centrimo-score', $centrimo_score) if defined($centrimo_score);
push(@args, '-centrimo-maxreg', $centrimo_maxreg) if defined($centrimo_maxreg);
push(@args, '-centrimo-ethresh', $centrimo_ethresh) if defined($centrimo_ethresh);
push(@args, '-centrimo-noseq') if $centrimo_noseq;
push(@args, $sequences);

# run MEME-ChIP
&run_memechip(@args);
# determine all files added
my @tar_files = &added_files($before, &dir_listing_set($workdir));
# read inputs
push(@tar_files, $sequences);
push(@tar_files, $bfile) if (defined($bfile));
push(@tar_files, $upmotif) if (defined($upmotif));
# create tar with all new files plus the input files
my $tar = &create_tar(@tar_files);
# add the tar file to the output listing
splice(@{$file_list}, 1, 0,  {file => $tar, desc => 'Gzipped Tar of all output'});
# update the status
add_status_msg("Done", $msg_list);
update_status($page, $program, 0, $file_list, $msg_list, "Done");
write_invocation_log($log_file, $log_date, $log_args);
exit(0);
1;

sub run_memechip {
  my @args = @_;
  my ($status, $t0, $t1, $time, $child_pid);
  &report_start('MEME-ChIP', 'meme-chip', @args);
  $t0 = [&gettimeofday()];
  $child_pid = open READER, '-|';
  die("pipe/fork: $!\n") unless defined $child_pid;
  if ($child_pid == 0) { # child process
    # send stderr to file
    open(STDERR, ">", $messages) or die("Can't redirect STDERR: $!");
    # start meme-chip process, this will replace the current process and should not return
    exec(catfile('@BINDIR@', 'meme-chip'), @args);
    exit(1); #should not get here
  }
  # read the status updates from the child and update the index
  my ($line, $prog, $cmd, $run_time, $signal, $with_coredump, $error_code, $msg);
  while ($line = <READER>) {
    chomp($line);
    if ($line =~ /^Starting (\S+):\s+(.*)$/) {
      $prog = $1;
      $cmd = $2;
      $msg = 'MEME-ChIP is starting subprocess <b>'.$prog.'</b><br><code>' . $cmd . '</code>'
    } elsif ($line =~ /^(\S+) ran successfully in (\d+(?:\.\d+)?) seconds/) {
      $prog = $1;
      $run_time = $2;
      $msg = '<span style="color: #008000">MEME-ChIP subprocess <b>'.$prog.'</b> ran successfully in <b>'.sprintf('%.1f', $run_time) .'</b> seconds</span>';
    } elsif ($line =~ /^(\S+) failed to run/) {
      $prog = $1;
      $msg = '<span style="color: #800000">MEME-ChIP failed to start <b>'.$prog.'</b></span>';
    } elsif ($line =~ /^(\S+) process died with signal (\d+), (with|without) coredump$/) {
      $prog = $1;
      $signal = $2;
      $with_coredump = ($3 eq 'with');
      $msg = '<span style="color: #800000">MEME-ChIP subprocess <b>'.$prog.'</b> exited with signal '.$signal.'</span>';
    } elsif ($line =~ /^(\S+) exited with error code (\d+)$/) {
      $prog = $1;
      $error_code = $2;
      $msg = '<span style="color: #800000">MEME-ChIP subprocess <b>'.$prog.'</b> exited with error code '.$error_code.'</span>';
    } elsif ($line =~ /^Ran out of time! Stopping (\S+)$/) {
      $prog = $1;
      $msg = 'MEME-ChIP is stopping subprocess <b>'.$prog.'</b> because it used all the alloted time';
    } else {
      next;
    }
    add_status_msg($msg, $msg_list);
    update_status($page, $program, $refresh, $file_list, $msg_list, "Running MEME-ChIP");
  }
  waitpid($child_pid, 0); # wait for the child process to exit
  $status = $?;
  $t1 = [&gettimeofday()];
  $time = &tv_interval($t0, $t1);
  # remove the simlink
  unlink('db') if (-e 'db');
  &report_status('MEME-ChIP', $status, $time);
}

sub create_tar {
  my @tar_files = @_;
  my ($folder, $folder_dir, $tarname, $t0, $t1, $time, $status);
  # we want to tar including the containing directory
  # find the real name of the containing directory
  $folder = (splitdir(abs_path()))[-1];
  $folder_dir = abs_path('..');
  # append the folder to the tar_files
  for (my $i = 0; $i < scalar(@tar_files); $i++) {
    $tar_files[$i] = abs2rel(abs_path($tar_files[$i]), $folder_dir);
  }
  $tarname = $folder . ".tar.gz";
  my @args = ('-czf', $tarname, '-C', $folder_dir, @tar_files);
  &report_start('Tar', 'tar', @args);
  $t0 = [&gettimeofday()];
  # run tar
  $status = system('tar', @args);
  $t1 = [&gettimeofday()];
  $time = &tv_interval($t0, $t1);
  &report_status('Tar', $status, $time);
  return $tarname;
}

sub report_start {
  my ($name, $prog, @args) = @_;

  add_status_msg('Starting <b>'.$name.'</b><br><code>' . stringify_args($prog, @args) . '</code>', $msg_list);
  update_status($page, $program, $refresh, $file_list, $msg_list, "Running");
}

sub report_status {
  my ($name, $status_code, $time) = @_;

  my $status_msg;
  if ($status_code != 0) {
    if ($status_code == -1) {
      $status_msg = $name . " failed to run";
    } elsif ($status_code & 127) {
      $status_msg = '<b>' . $name . "</b> process exited with signal " . ($status_code & 127);
    } else {
      $status_msg = '<b>' . $name . "</b> exited with error code " . ($status_code >> 8);
    }
    $status_msg = '<span style="color: #800000">'.$status_msg.'</span>';
  } else {
    $status_msg = '<span style="color: #008000"><b>'. $name . '</b> ran successfully in <b>' . 
        (int($time * 100 + 0.5) / 100) . '</b> seconds</span>';
  }
  add_status_msg($status_msg, $msg_list);

  update_status($page, $program, 0, $file_list, $msg_list, 
      ($status_code ? "Error" : ""));

  if ($status_code) {
    write_invocation_log($log_file, $log_date, $log_args);
    exit(1);
  }
}

sub added_files {
  my ($before, $after) = @_;
  my @list = ();
  foreach my $file (keys %{$after}) {
    push(@list, $file) unless ($before->{$file});
  }
  return @list;
}

sub dir_listing_set {
  my ($dir) = @_;
  my @list = &dir_listing($dir);
  my %set = ();
  foreach my $file (@list) {
    $set{$file} = 1;
  }
  return \%set;
}

sub dir_listing {
  my ($dir) = @_;
  my @list = ();
  my $dirh;
  opendir($dirh, $dir);
  while (1) {
    my $fname = readdir($dirh);
    last unless defined($fname);
    next if ($fname eq '.' || $fname eq '..');
    my $file = catfile($dir, $fname);
    if (-d $file) {
      unless (-l $file) { # skip symlinks
        push(@list, &dir_listing($file));
      }
    } else {
      push(@list, $file);
    }
  }
  closedir($dirh);
  return @list;
}
