#!@WHICHPERL@
=head1 NAME

meme_webservice - Run meme and mast in a restricted mode and create an index webpage.

=head1 SYNOPSIS

meme_webservice [options] <sequences>

  Options: 
    -alpha [RNA|DNA|PROTEIN]  The standard alphabet of the sequences. Default: DNA
    -alphf <file>             The alphabet definition of the sequences.
    -objfun [classic|de]      The objective function
    -neg <file>               A negative sequences set, for generating and -objfun de
    -mod [oops|zoops|anr]     The expected number of motif repeats per sequence.
                              Default: zoops
    -nmotifs <count>          The number of motif to find. Default: 3
    -minw <width>             The minimum width of the motif. Default: 6
    -maxw <width>             The maximum width of the motif. Default: 50
    -minsites <num>           The minimum number of sites per motif.
    -maxsites <num>           The maximum number of sites per motif.
    -bfile <file>             A background file.
    -markov_order <num>       Order of background model to use or generate
    -norevcomp                Restrict sites to only given strand.
    -pal                      Only find palindromes.
    -shuffle                  Shuffle the input sequences to destroy any
                               existing patterns.
    -help                     Show this brief help message.

=cut

use strict;
use warnings;
# load standard perl libraries
use Getopt::Long qw(:config permute auto_help);
use Pod::Usage;
# load custom perl libraries
use lib qw(@PERLLIBDIR@);
use Alphabet qw(rna dna protein);
use StatusPage qw(arg_checks arg_end opt_uploaded opt_choice opt_integer opt_evalue);
use Globals;
# constants
my $bin_dir = '@BINDIR@';
my $libexec_dir = '@LIBEXECDIR@';
# required parameters
my $in_seqs;
# option defaults
my $alpha = 'DNA';
my $alphf = undef;
my $objfun = "classic";
my $mode = "zoops";
my $nmotifs = 3;
my $minw = 6;
my $maxw = 50;
my $minsites = undef;
my $maxsites = undef;
my $bfile = undef;
my $markov_order = undef;
my $negfile = undef;
my $norevcomp = 0; #FALSE
my $pal = 0; #FALSE
my $shuffle = 0; #FALSE
#status page
my $status = new StatusPage('MEME', \@ARGV);
$status->add_message('Parsing arguments');
# parse options
my @arg_errors = ();
my $opts_ok = do {
  local $SIG{__WARN__} = sub { my ($msg) = @_; chomp($msg); push(@arg_errors, $msg); };
  GetOptions(
  '<>' => arg_checks(opt_uploaded(\$in_seqs), arg_end()),
  'alpha=s' => opt_choice(\$alpha, 'DNA', 'RNA', 'PROTEIN'),
  'alphf=s' => opt_uploaded(\$alphf),
  'objfun=s' => opt_choice(\$objfun, "classic", "de"),
  'mod=s' => opt_choice(\$mode, "zoops", "oops", "anr"),
  'nmotifs=i' => opt_integer(\$nmotifs, 1),
  'minw=i' => opt_integer(\$minw, 2, 300),
  'maxw=i' => opt_integer(\$maxw, 2, 300),
  'minsites=i' => opt_integer(\$minsites, 2, 600),
  'maxsites=i' => opt_integer(\$maxsites, 2, 600),
  'bfile=s' => opt_uploaded(\$bfile),
  'markov_order=i' => opt_integer(\$markov_order, 0, 4),
  'neg=s' => opt_uploaded(\$negfile),
  'norevcomp' => \$norevcomp,
  'pal' => \$pal,
  'shuffle' => \$shuffle
  );
};
# add additional error messages for missing sequences and motifs
push(@arg_errors, "No sequences provided.") unless defined $in_seqs;
# display the error messages both to the status page and stdout
foreach my $arg_error (@arg_errors) {
  print STDERR $arg_error, "\n";
  $status->add_message($arg_error);
}
$opts_ok = 0 if (scalar(@arg_errors) > 0);
# declare some derived file names
my $sequences = $in_seqs;
my $shuffled_seqs = 'shuffled_' . $in_seqs;
# setup status page
$status->add_file('meme_html', 'meme.html', 'MEME HTML output');
$status->add_file('meme_xml', 'meme.xml', 'MEME XML output');
$status->add_file('meme_text', 'meme.txt', 'MEME text output');
$status->add_file('mast_html', 'mast.html', 'MAST HTML output');
$status->add_file('mast_xml', 'mast.xml', 'MAST XML output');
$status->add_file('mast_text', 'mast.txt', 'MAST text output');
$status->add_file('pos_seqs', $in_seqs, '(Primary) Sequences');
$status->add_file('shu_seqs', $shuffled_seqs, 'Shuffled sequences');
$status->add_file('bfile', $bfile, 'Uploaded Background');
$status->add_file('neg_seqs', $negfile, 'Control Sequences');
$status->add_file('psp', 'priors.psp', 'Position-specific priors');
$status->add_message($opts_ok ? 'Arguments ok' : "Error parsing arguments");
$status->update($opts_ok ? 'Starting' : 'Error');
# exit if there was an error reading the arguments
unless ($opts_ok) {
  $status->write_log();
  pod2usage(2);
}
# load the alphabet
my $alphabet = $status->load_alphabet($alpha, $alphf);
my @alph_args = (defined($alphf) ? ('-alph', $alphf) : ('-' . lc($alpha)));
# Run fasta-shuffle-letters
if ($shuffle) {
  $status->run(PROG => 'fasta-shuffle-letters', BIN => $libexec_dir,
      ARGS => [$in_seqs, $shuffled_seqs]);
  $sequences = $shuffled_seqs;
}
# Run PSPGen
if (defined($negfile) && $objfun eq "classic") { 
  # use the minw and maxw settings for MEME for finding the PSP but
  # trim to the allowed range for PSPs
  # the actual width set by the PSP finder is that with the highest
  # score before normalizing; allow X or N or other nonspecific residue/base
  # codes (but score any sites containing them as zero)
  my $psp_minw = $minw < $MINPSPW ? $MINPSPW : $minw;
  my $psp_maxw = $maxw > $MAXPSPW ? $MAXPSPW : $maxw;
  my @pspgen_args = ('-pos', $sequences, '-neg', $negfile, @alph_args, 
    '-minw', $psp_minw, '-maxw', $psp_maxw);
  push(@pspgen_args, '-maxrange', '-triples') if ($alphabet->equals(protein()));
  $status->run(PROG => 'psp-gen', BIN => $libexec_dir,
      ARGS => \@pspgen_args, OUT_FILE => 'priors.psp');
}
# Run MEME (let MEME manage the timeout)
my @meme_args = ($sequences, @alph_args, '-oc', '.', '-nostatus',
  '-time', $status->remaining_time(), #'-maxsize', $MAXDATASET, 
  '-mod', $mode, '-nmotifs', $nmotifs, '-minw', $minw, '-maxw', $maxw,
  '-objfun', $objfun);
push(@meme_args, '-neg', $negfile) if ($objfun eq "de");
push(@meme_args, '-minsites', $minsites) if (defined($minsites));
push(@meme_args, '-maxsites', $maxsites) if (defined($maxsites));
if ($alphabet->has_complement()) {
  push(@meme_args, '-revcomp') unless $norevcomp;
  push(@meme_args, '-pal') if $pal;
}
push(@meme_args, '-bfile', $bfile) if (defined($bfile));
push(@meme_args, '-markov_order', $markov_order) if (defined($markov_order));
push(@meme_args, '-psp', 'priors.psp') if (defined($negfile) && $objfun eq "classic");
$status->run(PROG => 'meme', BIN => $bin_dir, ARGS => \@meme_args, TIMEOUT => 0);
# Run MAST
my @mast_args = ('meme.xml', $sequences, '-oc', '.', '-nostatus');
push(@mast_args, '-bfile', $bfile) if (defined($bfile));
$status->run(PROG => 'mast', BIN => $bin_dir, ARGS => \@mast_args);
# done
$status->add_message("Done");
$status->update();
$status->write_log();
1;
