#!@WHICHPERL@ -w
#
# $Id:$
#
# FILE: iupac2meme
# AUTHOR: Timothy L. Bailey
# CREATE DATE: 3/06/2010
# DESCRIPTION: Convert a DNA IUPAC motif to MEME format 

use warnings;
use strict;


use lib qw(@PERLLIBDIR@);

use MotifUtils qw(seq_to_intern intern_to_meme load_alphabet read_background_file);

use Getopt::Long;
use Pod::Usage;

=head1 NAME

iupac2meme - Converts IUPAC motifs into MEME motifs.

=head1 SYNOPSIS

iupac2meme [options] <iupac motif>+

 Options: 
  -dna                          use DNA IUPAC alphabet
  -protein                      use protein IUPAC alphabet
  -alph <alphabet file>         file with alphabet definition;
                                default: use DNA IUPAC alphabet
  -numseqs <numseqs>            assume frequencies based on this many 
                                sequences; default: 20
  -bg <background file>         file with background frequencies of letters; 
                                default: uniform background
  -pseudo <total pseudocounts>  add <total pseudocounts> times letter 
                                background to each frequency; default: 0
  -logodds                      output the log-odds (PSSM) and frequency 
                                (PSPM) motifs; default: PSPM motif only
  -url <website>                website for the motif; The motif name is 
                                substituted for MOTIF_NAME; default: no url
  -nosort                       don't sort the order of motifs
  -named                        looks for a motif name after each IUPAC code;
                                default: use the IUPAC code as the motif name
 
 Converts IUPAC motifs into MEME motifs.
 
 Example IUPAC DNA motif: ACGGWNNYCGT
 Example IUPAC PROTEN motif: IKLVBZYXXHG

 Writes standard output.

=cut

# Set option defaults
my $is_dna = 1;
my $alph_file = undef;
my $num_seqs = 20;
my $bg_file;
my $pseudo_total = 0;
my $print_logodds = 0;
my $url_pattern = "";
my $no_sort = 0;
my $named = 0;
my @motif_strings = ();

GetOptions(
  "dna" => \$is_dna, "protein" => sub {$is_dna = 0},
  "alph=s" => \$alph_file, "numseqs=i" => \$num_seqs, 
  "bg=s" => \$bg_file, "pseudo=f" => \$pseudo_total, "logodds" => \$print_logodds,
  "url=s" => \$url_pattern, "nosort" => \$no_sort, "named" => \$named) or pod2usage(2);
@motif_strings = @ARGV;

#check num seqs
pod2usage("Option -numseqs must have a positive value.") if ($num_seqs < 0);
#check pseudo total
pod2usage("Option -pseudo must have a positive value.") if ($pseudo_total < 0);
#check IUPAC motifs
pod2usage("At least one IUPAC motif must be specified.") if (!@motif_strings);

# pair up with names
my @motif_pairs = ();
while (@motif_strings) {
  my $motif_iupac = shift @motif_strings;
  my $motif_name = $named ? shift @motif_strings : $motif_iupac;
  push(@motif_pairs, {iupac => $motif_iupac, name => $motif_name});
}

#sort the motifs
@motif_pairs = sort {$a->{iupac} cmp $b->{iupac}} @motif_pairs unless $no_sort;

# get the background model
my %bg = &read_background_file(&load_alphabet($is_dna, $alph_file), $bg_file);

#
# convert the IUPAC motifs to MEME format
#
my $num_motifs = 0;
foreach my $pair (@motif_pairs) {
  my $iupac_motif = $pair->{iupac};
  my $name = $pair->{name};
  my $url = $url_pattern;
  $url =~ s/MOTIF_NAME/$iupac_motif/g;
  my ($motif, $errors) = seq_to_intern(\%bg, $iupac_motif, $num_seqs, 
      $pseudo_total, url => $url, id => $name);
  print STDERR join("\n", @{$errors}), "\n" if @{$errors};
  print intern_to_meme($motif, $print_logodds, 1, !($num_motifs++)) if $motif;
} # convert motif to MEME format

