#!@WHICHPERL@

use strict;
use warnings;

use lib qw(@PERLLIBDIR@);

use MotifUtils qw(matrix_to_intern intern_to_meme intern_to_iupac load_alphabet read_background_file);

use Getopt::Long;
use Pod::Usage;

=head1 NAME

matrix2meme - Converts count or probabilty matrix motifs into MEME motifs.

=head1 SYNOPSIS

matrix2meme [options]

 Options: 
  -dna
  -protein
  -alph <alphabet file>         Alphabet definition; default: DNA
  -orien auto|col|row           The matrix orientation; default: auto
  -numseqs <numseqs>            assume frequencies based on this many 
                                sequences when probabilty matrix is used;
                                default: 20
  -bg <background file>         file with background frequencies of letters; 
                                default: uniform background
  -pseudo <total pseudocounts>  add <total pseudocounts> times letter 
                                background to each frequency; default: 0
  -logodds                      output the log-odds (PSSM) and frequency 
                                (PSPM) motifs; default: PSPM motif only
  -url <website>                website for the motif; The motif name is 
                                substituted for MOTIF_NAME; default: no url
 
 Converts matrix motifs into MEME motifs. 
 
 The motifs are named based on their order in the file but their consensus
 sequence is used as the alternate name.
 
 Reads matricies from standard input separated by empty lines.
 Writes MEME format to standard output.

 Example DNA count matrix:
  1 5 6 3
  5 5 5 0
 12 1 1 1

=cut

# Set option defaults
my $is_dna = 1;
my $alph_file = undef;
my $orient = 'auto';
my $num_seqs = 20;
my $bg_file;
my $pseudo_total = 0;
my $print_logodds = 0;
my $url_pattern = "";
my $help = 0; #FALSE

GetOptions("dna" => \$is_dna, "protein" => sub {$is_dna = 0;},
  "alph=s" => \$alph_file, "orientation=s" => \$orient, "numseqs=i" => \$num_seqs, 
  "bg=s" => \$bg_file, "pseudo=f" => \$pseudo_total, "logodds" => \$print_logodds,
  "url=s" => \$url_pattern, "help|?" => \$help) or pod2usage(2);

pod2usage(1) if $help;

#check num seqs
pod2usage("Option -numseqs must have a positive value.") if ($num_seqs < 0);
#check pseudo total
pod2usage("Option -pseudo must have a positive value.") if ($pseudo_total < 0);

# get the background model
my %bg = &read_background_file(&load_alphabet($is_dna, $alph_file), $bg_file);

# read the matrix motifs
my $matrix = "";
my $motifnum = 1;
while (my $line = <STDIN>) {
  chomp($line);
  if ($line =~ m/^\s*$/) {
    next unless $matrix =~ m/\S/;
    &print_motif($motifnum++, $matrix);
    $matrix = "";
  } else {
    $matrix .= $line . "\n";
  }
}
if ($matrix =~ m/\S/) {
  &print_motif($motifnum++, $matrix);
}

# print the MEME motifs
sub print_motif {
  my ($num, $text) = @_;
  my $url = $url_pattern;
  $url =~ s/MOTIF_NAME/$num/g;
  my ($motif, $errors) = matrix_to_intern(
    \%bg, $text, $orient, $num_seqs, $pseudo_total,
    id => $num, url => $url);
  $motif->{alt} = intern_to_iupac($motif);
  print STDOUT intern_to_meme($motif, $print_logodds, 1, ($num == 1));
}
