#!/usr/bin/env perl
#
#    Copyright (C) 2009 Genome Research Ltd.
#    Portions copyright (C) 2009, 2010 Broad Institute.
#
#    Author: Heng Li <lh3@sanger.ac.uk>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

use strict;
use warnings;
use Getopt::Std;

&wgsim_eval;
exit;

sub wgsim_eval {
  my %opts = (g=>5);
  getopts('pcag:', \%opts);
  die("Usage: wgsim_eval.pl [-pca] [-g $opts{g}] <in.sam>\n") if (@ARGV == 0 && -t STDIN);
  my (@c0, @c1, %fnfp);
  my ($max_q, $flag) = (0, 0);
  my $gap = $opts{g};
  $flag |= 1 if (defined $opts{p});
  $flag |= 2 if (defined $opts{c});
  while (<>) {
    next if (/^\@/);
    my @t = split("\t");
    next if (@t < 11);
    my $line = $_;
    my ($q, $is_correct, $chr, $left, $rght) = (int($t[4]/10), 1, $t[2], $t[3], $t[3]);
    $max_q = $q if ($q > $max_q);
    # right coordinate
    $_ = $t[5]; s/(\d+)[MDN]/$rght+=$1,'x'/eg;
    --$rght;
    # correct for soft clipping
    my ($left0, $rght0) = ($left, $rght);
    $left -= $1 if (/^(\d+)[SH]/);
    $rght += $1 if (/(\d+)[SH]$/);
    $left0 -= $1 if (/(\d+)[SH]$/);
    $rght0 += $1 if (/^(\d+)[SH]/);
    # skip unmapped reads
    next if (($t[1]&0x4) || $chr eq '*');
    # parse read name and check
    if ($t[0] =~ /^(\S+)_(\d+)_(\d+)_/) {
      if ($1 ne $chr) { # different chr
        $is_correct = 0;
      } else {
        if ($flag & 2) {
          if (($t[1]&0x40) && !($t[1]&0x10)) { # F3, forward
            $is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap);
          } elsif (($t[1]&0x40) && ($t[1]&0x10)) { # F3, reverse
            $is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap);
          } elsif (($t[1]&0x80) && !($t[1]&0x10)) { # R3, forward
            $is_correct = 0 if (abs($3 - $left) > $gap && abs($3 - $left0) > $gap);
          } else { # R3, reverse
            $is_correct = 0 if (abs($2 - $rght) > $gap && abs($3 - $rght0) > $gap);
          }
        } else {
          if ($t[1] & 0x10) { # reverse
            $is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap); # in case of indels that are close to the end of a reads
          } else {
            $is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap);
          }
        }
      }
    } else {
      warn("[wgsim_eval] read '$t[0]' was not generated by wgsim?\n");
      next;
    }
    ++$c0[$q];
    ++$c1[$q] unless ($is_correct);
    @{$fnfp{$t[4]}} = (0, 0) unless (defined $fnfp{$t[4]});
    ++$fnfp{$t[4]}[0];
    ++$fnfp{$t[4]}[1] unless ($is_correct);
    print STDERR $line if (($flag&1) && !$is_correct && $q > 0);
  }
  # print
  my ($cc0, $cc1) = (0, 0);
  if (!defined($opts{a})) {
    for (my $i = $max_q; $i >= 0; --$i) {
      $c0[$i] = 0 unless (defined $c0[$i]);
      $c1[$i] = 0 unless (defined $c1[$i]);
      $cc0 += $c0[$i]; $cc1 += $c1[$i];
      printf("%.2dx %12d / %-12d  %12d  %.3e\n", $i, $c1[$i], $c0[$i], $cc0, $cc1/$cc0) if ($cc0);
    }
  } else {
    for (reverse(sort {$a<=>$b} (keys %fnfp))) {
      next if ($_ == 0);
      $cc0 += $fnfp{$_}[0];
      $cc1 += $fnfp{$_}[1];
      print join("\t", $_, $cc0, $cc1), "\n";
    }
  }
}
