#!/usr/bin/env perl

use strict;
use warnings;
use Carp;
use Getopt::Long qw(:config no_ignore_case bundling pass_through);
use POSIX qw (ceil);

my $help_flag;
my $rsem_expr_file;
my $num_quintiles = 10;
my $feature_list_file;
my $min_fpkm = 0.5;

my $usage = <<_EOUSAGE_;

##############################################################################
#
#  Required:
#
#  --rsem_expr_file <string>      gene or isoform.results file generated by RSEM
#
#  --feature_list_file <string>   file containing list of features (transcript or gene identifiers, corresponding to rsem_expr_file above
#
#  Optional:
#
#  --num_quintiles <int>          default: 10
#
#  --min_fpkm <float>             default: 0.5  (features with lower expression estimates are not included in the expr quantiles)
#
##############################################################################

_EOUSAGE_

    ;


&GetOptions ( 'h' => \$help_flag,
              'rsem_expr_file=s' => \$rsem_expr_file,
              'num_quintiles=i' => \$num_quintiles,
              'feature_list_file=s' => \$feature_list_file,
              'min_fpkm=f' => \$min_fpkm,
              );


if ($help_flag) {
    die $usage;
}

unless($rsem_expr_file && $feature_list_file) {
    die $usage;
}

main: {


    my %feature_to_quintile = &compute_expression_quintile($rsem_expr_file, $num_quintiles);

    my %seen;
    my %quintile_to_count;
    open (my $fh, $feature_list_file) or die "Error, cannot open file $feature_list_file";
    while (<$fh>) {
        chomp;
        my $acc = $_;
        if ($seen{$acc}) {
            #print STDERR "WARNING: $acc already counted from list. Ignoring.\n";
            next;
        }
        $seen{$acc} = 1;
        
        my $quintile = $feature_to_quintile{$acc};
        unless (defined $quintile) {
            print STDERR "WARNING: $acc not assigned to expr quintile.\n";
            next;
        }
        
        $quintile_to_count{$quintile}++;
    }
    close $fh;

    foreach my $quintile (sort {$a<=>$b} keys %quintile_to_count) {
        my $count = $quintile_to_count{$quintile};
        print join("\t", $quintile, $count) . "\n";
    }

    exit(0);
    
    

}

####
sub compute_expression_quintile {
    my ($rsem_expr_file, $num_quintiles) = @_;

    my @entries;

    open (my $fh, $rsem_expr_file) or die $!;
    my $header = <$fh>;
    while (<$fh>) {
        chomp;
        my @x = split(/\t/);
        my $acc = $x[0];
        my $fpkm = $x[6];

        if ($fpkm >= $min_fpkm) {

            push (@entries, { acc => $acc,
                              fpkm => $fpkm,
                          });
        }
    }
    close $fh;

    @entries = sort {$a->{fpkm}<=>$b->{fpkm}} @entries;

    my $num_entries = scalar(@entries);
   
    my %acc_to_quintile;
    my $quintile_size = sprintf("%.1f", 100/$num_quintiles);
    
    my $num_entries_per_quintile = $num_entries/$num_quintiles;

    for (my $i = 0; $i <= $#entries; $i++) {

        my $entry = $entries[$i];

        my $quintile_num = ceil( ($i+1) / $num_entries_per_quintile);
        
        my $acc = $entry->{acc};
        $acc_to_quintile{$acc} = $quintile_num;
    }

    return(%acc_to_quintile);
}


              
