#!@WHICHPERL@

# Defaults
my $mouse = 0;
my $clobber = 1;
my $out_dir = 'cismapper_out';
my $locus_file = undef;			# required
my $rna = undef;			# required
my $tissue = 'None';			# not an input argument
my $tissues = $mouse ? 'Bat_MAdult24wks,Bcellcd43n_MAdlt8w,Bmarrow_MAdult8wks,Bmdm_FAdult8wks,Cbellum_MAdult8wks,Ch12_FImmortal,Cortex_MAdult8wks,Esb4_ME0,Ese14_ME0,Heart_MAdult8wks,Heart_UE14half,Kidney_MAdult8wks,Limb_UE14half,Liver_MAdult8wks,Liver_UE14half,Lung_MAdult8wks,Mef_MAdult8wks,Mel_MImmortal,Olfact_MAdult8wks,Plac_FAdult8wks,Smint_MAdult8wks,Spleen_MAdult8wks,Testis_MAdult8wks,Thymus_MAdult8wks,Wbrain_UE14half' : 'Ag04450,Gm12878,H1hesc,Helas3,Hepg2,Huvec,K562';
my $hrd = $mouse ? 'MappingData/Mouse/Encode/Histone' : 'MappingData/Human/Histone';
my $hnames = $mouse ? 'H3k27ac' : 'H3k27ac,H3k4me3';
my $mlds = $mouse ? '500000' : '500000,1000';
my $erd = $mouse ? 'MappingData/Mouse/Encode/Expression' : 'MappingData/Human/Expression';
my $eft = $mouse ? 'bed' : 'gtf';
my $rtype_list = 'LongPap LongPapMouse LongPam Short Cage';	# legal RNA expression types
my $afile = $mouse ? 'MappingData/Mouse/Encode/mm9RefSeq.transcripts.named.gtf' : 'MappingData/Human/gencode.v7.transcripts.gtf';
my $atype = 'Gencode';
my $atype_list = 'Gencode RefSeq';	# not an input argument
#my $afield_list = 'gene_id,transcript_id';
my $ttypes = 'protein_coding,processed_transcript';
my $use_genes = 0;
my $mfc = 7;
my $mme = 2;
my $mhs = 0.05;
my $descr = undef;
my $echo = 1;
my $nostatus = 0;
my $no_map = 0;
my $remove_map = 0;

# ProcessedFileFormat (Constants)
my $transcript_location = 'TrLoc.bed';
my $loci = 'CRM.bed';
my $histone_level = 'HistLev.{2}.tsv';
my $transcript_expression = 'TrEx.tsv';

# Other stuff
my $config_file = "BuildMap.xml";

=head1 NAME

cismapper -- does an analysis of the targets of regulatory elements

=cut

my $usage = <<USAGE;               # usage message
cismapper [options] <locus_file> <rna_source>

 Required Arguments:
  <locus_file>			the name of a file of potential regulatory elements (BED format)
  <rna_source>			the type of RNA expression data from the list
					$rtype_list
 Options:
  -o <dir>   		 	output to the specified directory, failing if the directory exists
  -oc <dir>   			output to the specified directory, overwriting if the directory exists
  -tissues <tissues>		comma-separated list (no spaces) of tissue names that are the 
				sources of the histone and expression data; 
					default: $tissues
  -histone-root <hrd>		histone root directory; default: $hrd
					histone files should be in subfolders '<hrd>/<t>'
					where <t> is one of the tissue names in <tissues> 
  -histone-names <hnames>	comma-separated list (no spaces) of histone names; default: $hnames
    					Note: histone file names must match '<hrd>/<t>/*<hname>*broadPeak'
					where <t> is one of the tissue names in <tissues> 
					and <hname> is one of the histone names listed in <hnames>
  -max-link-distances <mlds>	comma-separated list (no spaces) of maximum distances 
					between an RE and its target;
					default: $mlds
					Note: there must be one distance for each histone name in <hnames>
  -expression-root <erd>	expression root directory;
					default: $erd
					Note: expression files should be in subfolders '<erd>/<t>'
					where <t> is one of the tissue names in <tissues> 
  -expression-file-type <eft>	file type of expression files; default: $eft
    					expression file names must match '<erd>/<t>/<rna_source>.<eft>'
					where <t> is one of the tissue names in <tissues> 
  -annotation-file-name <afile>	annotation file name; 
					default: $afile
  -annotation-type <atype>	type of annotation [Gencode|RefSeq]; default: $atype
  -transcript-types <ttypes>	types of transcript to use from annotation file; 
					default: $ttypes
  -min-feature-count <mfc>	only consider links where there is both histone and 
				expression data for at least this many tissues: default: $mfc
  -min-max-expression <mme>	maximum expression of a target must be at least <mme> for
				the target to be included in the map; default: $mme
  -max-html-score <mhs>		only include links with this score or better in the HTML
					output; default: $mhs
  -desc <text>  		plain text description of the job
  -fdesc <file>  		file containing plain text description of the job
  -nomap			use existing map file in output directory; default: build map
  -remove-map			delete the map file when done
  -noecho                   	don't echo the commands run; default: echo the commands
  -nostatus			don't print progress reports to the terminal
  -version			show version and exit
  -h				show this message
USAGE

use strict;
use warnings;

use Carp;
$SIG{ __DIE__ } = sub { Carp::confess( @_ ) };

use Cwd qw(abs_path);
use Data::Dumper;
use Fcntl qw(O_CREAT O_RDONLY O_WRONLY O_TRUNC SEEK_SET);
use File::Basename qw(fileparse);
use File::Path qw(mkpath);
use File::Temp qw(tempfile);
use File::Spec::Functions qw(catfile catdir tmpdir abs2rel);
use Getopt::Long;
use List::Util qw(min max);
use Time::HiRes qw(gettimeofday tv_interval);
#use XML::Simple;

use lib qw(@PERLLIBDIR@);
use Globals;
use ExecUtils qw(invoke stringify_args2);
use JsonRdr;
use HtmlMonolithWr;

# Globals
my $t0 = [&gettimeofday()];
my $logger = undef;
my $bin_dir = undef;
my $secondary_bin_dir = undef;
my $etc_dir = undef;
my $script_dir = undef;
my $temp_dir = undef;
my $version = undef;
my $revision = undef;
my $release = undef;
my $source_url = undef;
my $progress_log = undef;
my @cmd = (scalar fileparse($0), @ARGV);

my @jobs = ();

# Run CisMapper
&main();

#
# Setup some globals
#
sub initialise {
  # setup logging
  eval {
    require Log::Log4perl;
    Log::Log4perl->import();
    Log::Log4perl::init('@APPCONFIGDIR@/logging.conf');
    $logger = Log::Log4perl->get_logger('meme.service.cismapper');
    $SIG{__DIE__} = sub {
      return if ($^S);
      $logger->fatal(@_);
      die @_;
    };
    $logger->trace("call initialise");
  };

  # setup MONO_PATH
  #my $mono_path = $ENV{'MONO_PATH'} ? $ENV{'MONO_PATH'} : '@LIBEXECDIR@';
  $ENV{'MONO_PATH'} = '@LIBEXECDIR@';

  # disable buffering on stdout (it should be selected by default)
  $| = 1;

  # setup primary binary directory
  $bin_dir = $ENV{'MEME_BIN_DIR'} ? $ENV{'MEME_BIN_DIR'} : '@BINDIR@';

  # setup secondary bin directory
  $secondary_bin_dir = $ENV{'MEME_SECONDARY_BIN_DIR'} ? $ENV{'MEME_SECONDARY_BIN_DIR'} : '@LIBEXECDIR@';

  # setup script directory
  $script_dir = $ENV{'MEME_SCRIPT_DIR'} ? $ENV{'MEME_SCRIPT_DIR'} : '@LIBEXECDIR@';

  # setup etc directory
  $etc_dir = $ENV{'MEME_DATA_DIR'} ? $ENV{'MEME_DATA_DIR'} : '@APPCONFIGDIR@';

  # setup temporary directory
  $temp_dir = '@TMP_DIR@';
  # use the perl default if none is supplied or the replace fails
  $temp_dir = &tmpdir() if ($temp_dir eq '' || $temp_dir =~ m/^\@TMP[_]DIR\@$/);

  # store program info
  $version = '@ARCHIVE_VERSION@';
  $revision = '@ARCHIVE_REVISION@';
  $release = '@ARCHIVE_DATE@';
  $source_url = '@SOURCE_URL@';
} # initialize

sub init_progress_log {
  my ($out_dir) = @_;
  my $progress_log_filename = catfile($out_dir, 'progress_log.txt');
  sysopen($progress_log, $progress_log_filename, 
    O_CREAT | O_TRUNC | O_WRONLY) or die("Failed to create progress log file");
  # disable output buffering as this is meant to document when things go wrong
  my $old_fh = select($progress_log); $| = 1; select($old_fh);
}

#
# Parse the arguments
#
sub arguments {
  $logger->trace("call arguments") if $logger;
  # Set Option Defaults
  my $options = {
    NO_MAP => $no_map, 
    REMOVE_MAP => $remove_map, 
    ECHO => $echo, 
    STATUS => $nostatus, 
    CLOBBER => $clobber, 
    OUT_DIR => $out_dir,
    LOCUS_FILE => $locus_file, 
    RNA_SOURCE => $rna, 
    TISSUE => $tissue,
    TISSUES => $tissues, 
    HISTONE_ROOT => $hrd,
    HISTONE_NAMES => $hnames,
    MAX_LINK_DISTANCES => $mlds, 
    EXPRESSION_ROOT => $erd, 
    EXPRESSION_FILE_TYPE => $eft, 
    ANNOTATION_FILE_NAME => $afile,
    ANNOTATION_TYPE => $atype,
    TRANSCRIPT_TYPES => $ttypes, 
    USE_GENES => $use_genes,	# removed from input
    MIN_FEATURE_COUNT => $mfc, 
    MIN_MAX_EXPRESSION => $mme, 
    MAX_HTML_SCORE => $mhs, 
    DESCRIPTION => $descr, 
    ECHO => $echo,
    CONFIG => undef	# not an input argument
  };

  # General Options
  my $help = 0; # FALSE
  my $show_version = 0; # FALSE
  my $make_tsv_files  = 0;	# hidden
  my $map_file = undef;         # hidden
  my $output_file = undef;      # hidden
  my $annot_file = undef;       # hidden

  my @errors = ();
  my @dbs = ();

  # get the options from the arguments
  my $options_success = 0; # FALSE
  my $options_err = undef;
  # redirect stderr to a temp file so we can get the error message from GetOptions
  my $olderr;
  my $tmperr = tempfile('GetOptions_XXXXXXXXXX', DIR => $temp_dir, UNLINK => 1);
  open($olderr, ">&STDERR") or die("Can't dup STDERR: $!");
  open(STDERR, '>&', $tmperr) or die("Can't redirect STDERR to temp file: $!");
  # parse options
  $options_success = GetOptions(
    'h'          		=> \$help,
    'version'         		=> \$show_version,
    'make-tsv-files'		=> \$make_tsv_files,
    'map-file=s'		=> \$map_file,
    'output-file=s'		=> \$output_file,
    'annot-file=s'              => \$annot_file,
    'nomap'          		=> \$options->{NO_MAP},
    'remove-map'       		=> \$options->{REMOVE_MAP},
    'noecho'          		=> sub {$options->{ECHO} = 0},
    'nostatus'         		=> \$options->{NOSTATUS},
    'o=s'             		=> sub {$options->{CLOBBER} = 0; shift; $options->{OUT_DIR} = shift},
    'oc=s'            		=> \$options->{OUT_DIR},
    'desc=s'          		=> \$options->{DESCRIPTION},
    'fdesc=s'         		=> # slurp the description into a scalar
      sub {
	my ($param, $file) = @_;
	open(my $fh, '<', $file) or die("Couldn't open description file: $!\n");
	$options->{DESCRIPTION} = do { local( $/ ); <$fh>};
	close($fh);
      },
    'tissues=s'			=> \$options->{TISSUES},
    'histone-root=s'		=> \$options->{HISTONE_ROOT},
    'histone-names=s' 		=> \$options->{HISTONE_NAMES},
    'max-link-distances=s'	=> \$options->{MAX_LINK_DISTANCES},
    'expression-root=s'		=> \$options->{EXPRESSION_ROOT},
    'expression-file-type=s' 	=> \$options->{EXPRESSION_FILE_TYPE},
    'annotation-file-name=s'	=> \$options->{ANNOTATION_FILE_NAME},
    'annotation-type=s'		=> \$options->{ANNOTATION_TYPE},
    'annotation-add-fields=s'	=> \$options->{ANNOTATION_ADD_FIELDS},
    'transcript-types=s'	=> \$options->{TRANSCRIPT_TYPES},
    'use-genes' 		=> \$options->{USE_GENES},
    'min-feature-count=i'	=> \$options->{MIN_FEATURE_COUNT},
    'min-max-expression=f'	=> \$options->{MIN_MAX_EXPRESSION},
    'max-html-score=f'		=> \$options->{MAX_HTML_SCORE}
  );
  # get required options
  $options->{LOCUS_FILE} = $ARGV[0];
  $options->{RNA_SOURCE} = $ARGV[1];

  # Short-circuit recursive call (hack)
  if ($make_tsv_files) {
    &make_tsv_files_helper($options->{OUT_DIR}, $map_file, $annot_file);
    exit(0)
  }

  # display version
  if ($show_version) {
    print STDOUT $version, "\n";
    exit(0);
  }

  # display help
  if ($help) {
    print STDOUT $usage;
    exit(0);
  }

  # reset STDERR
  open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!");
  # read argument parsing errors
  seek($tmperr, 0, SEEK_SET);
  while (<$tmperr>) {chomp; push(@errors, $_);}
  close($tmperr);

  # check that we're not clobbering something we're not meant to
  if (-e $options->{OUT_DIR}) {
    unless (-d $options->{OUT_DIR}) {
      push(@errors, "Can't output to \"$options->{OUT_DIR}\" as it is not a directory.\n");
    } elsif (!$options->{CLOBBER}) {
      push(@errors, "Can't output to \"$options->{OUT_DIR}\" as the directory already exists.\n");
    }
  }

  # locus file
  unless (defined($options->{LOCUS_FILE})) {
    push(@errors, "You must specify a BED file with chromosomal loci on the command line with <locus_file>.");
  } elsif (! -e $options->{LOCUS_FILE}) {
    push(@errors, "RE locus file '$options->{LOCUS_FILE}' not found.");
    push(@errors, "You must specify a valid locus file on the command line with <locus_file>.");
  }

  # -rna-source
  unless (defined $options->{RNA_SOURCE} and $rtype_list =~ /\b$options->{RNA_SOURCE}\b/) {
    push(@errors, "You must specify a valid RNA source type on the command line.");
    push(@errors, "Valid RNA source types are: $rtype_list");
    $options->{RNA_SOURCE} = undef;
  }

  # -tissues
  if ($options->{TISSUES} eq "") {
    push(@errors, "You must specify a (comma-separated) list of tissues with -tissues <tissues> (e.g., '$tissues').");
  }

  # -histone-root
  $options->{HISTONE_ROOT} =~ s/\/$//;   # remove trailing slash
  unless (-d abs_path($options->{HISTONE_ROOT})) {
    push(@errors, "Histone root directory '$options->{HISTONE_ROOT}' does not exist or is not a directory.");
    push(@errors, "You must specify a valid histone root directory with -histone-root <hrd>.");
  }

  # -histone-names
  if ($options->{HISTONE_NAMES} eq "") {
    push(@errors, "You must specify a valid list of histone names with -histone-names <hnames> (e.g., '$hnames').");
  }

  # -expression-root 
  $options->{EXPRESSION_ROOT} =~ s/\/$//;   # remove trailing slash
  unless (-d abs_path($options->{EXPRESSION_ROOT})) {
    push(@errors, "Expression root directory '$options->{EXPRESSION_ROOT}' does not exist or is not a directory.");
    push(@errors, "You must specify a valid expression root directory with -expression-root <erd>.");
  }

  # -expression-file-type
  if ($options->{EXPRESSION_FILE_TYPE} eq "") {
    push(@errors, "You must specify an expression file type with -expression-file-type <eft> (e.g., '$eft').");
  }

  # check that there are enough histone and expression files for the given tissues
  my @histones = split ",", $options->{HISTONE_NAMES};
  if (defined $options->{RNA_SOURCE}) {
    foreach my $histone (@histones) {
      my $n_both = 0;
      unless($options->{NOSTATUS}) {printf(STDOUT "Checking for histone ($histone) and expression data for the given tissues...\n\n")};
      if (-d abs_path($options->{HISTONE_ROOT}) && -d abs_path($options->{EXPRESSION_ROOT})) {
	my $histone_glob_path = $options->{HISTONE_ROOT} . "/*/*" . $histone . "*broadPeak";
	my $expression_glob_path = $options->{EXPRESSION_ROOT} . "/<t>/" . $options->{RNA_SOURCE} . "." . $options->{EXPRESSION_FILE_TYPE};
	my @tissues = split ',', $options->{TISSUES};
        unless($options->{NOSTATUS}) {
	  printf(STDOUT "%-20s %10s %10s %10s %10s\n", "TISSUE", "HISTONE", "EXPRESSION", "BOTH", "N_BOTH");
	  printf(STDOUT "%-20s %10s %10s %10s %10s\n", '='x20, '='x10, '='x10, '='x10, '='x10);
        }
	foreach my $tissue (@tissues) {
	  my $histone_glob_path = "$options->{HISTONE_ROOT}/$tissue/*$histone*broadPeak";
	  my @histone_files = glob($histone_glob_path);
	  my $expression_file = $options->{EXPRESSION_ROOT} . "/" . $tissue . "/" . $options->{RNA_SOURCE} . "." . $options->{EXPRESSION_FILE_TYPE};
	  my $found_histone = (@histone_files >0) ? "yes" : "no";
	  my $found_expression = (-e $expression_file) ? "yes" : "no";
	  my $found_both = ($found_histone eq "yes" && $found_expression eq "yes") ? "yes" : "no";
	  $n_both++ if $found_both eq "yes";
          unless($options->{NOSTATUS}) {
	    printf(STDOUT "%-20s %10s %10s %10s %10d\n", 
	      $tissue, $found_histone, $found_expression, $found_both, $n_both);
          }
	}
	unless($options->{NOSTATUS}) {printf(STDOUT "\n");}
	if ($n_both < $options->{MIN_FEATURE_COUNT}) {
	  push(@errors, "The number of tissues with both histone and expression data is too small ($n_both vs. $options->{MIN_FEATURE_COUNT}).");
	  push(@errors, "Check that your files are in the correct locations or lower -min-feature-count.\n");
	  push(@errors, "Histone files should be in the path: $histone_glob_path");
	  push(@errors, "Expression files should be in the path: $expression_glob_path\n  where <t> is one of <tissues> = '$options->{TISSUES}'");
	}
      }
    } # histone
  } # valid RNA source

  # max-link-distance
  my $i;
  my @distances = split ",", $options->{MAX_LINK_DISTANCES};
  foreach my $distance (@distances) {
    $i++;
    $distance += 0;
    unless ($distance > 1) {
      push(@errors, "Illegal value for value-$i in -max-link-distances <mlds>: $distance.  Must be > 0.");
    }
  }
  unless ($#distances == $#histones) {
    push(@errors, "There must be the same number of (comma-separated) values in -max-link-distances as -histone-names:\n" .
      "  '$options->{MAX_LINK_DISTANCES}' vs. '$options->{HISTONE_NAMES}'");
  }

  # annotation-file-name
  unless (-e $options->{ANNOTATION_FILE_NAME}) {
    push(@errors, "Annotation file '$options->{ANNOTATION_FILE_NAME}' not found.");
    push(@errors, "You must specify a valid annotation file with -annotation-file-name <afile>.");
  }

  # annotation-type
  unless ($atype_list =~ /\b$options->{ANNOTATION_TYPE}\b/) {
    push(@errors, "Invalid annotation type: '$options->{ANNOTATION_TYPE}'.  Valid types are: $atype_list");
    push(@errors, "You must specify a valid annotation type with -annotation-type <atype>.");
  }

  # min-feature-count
  unless ($options->{MIN_FEATURE_COUNT} > 2) {
    push(@errors, "Illegal value for -min-feature-count <mfc>: $options->{MIN_FEATURE_COUNT}.  Must be > 2.");
  }

  # min-max-expression
  unless ($options->{MIN_MAX_EXPRESSION} >= 1) {
    push(@errors, "Illegal value for -min-max-expression <mme>: $options->{MIN_MAX_EXPRESSION}.  Must be >= 1.");
  }

  # max-html-score
  unless ($options->{MAX_HTML_SCORE} > 0 and $options->{MAX_HTML_SCORE} <= 1) {
    push(@errors, "Illegal value for -max-html-score <mhs>: $options->{MAX_HTML_SCORE}.  Must be >0 and <=1.");
  }

  # print errors
  foreach my $error (@errors) {
    print STDERR $error, "\n";
  }
  if (@errors) {
    print STDOUT "\nType 'cismapper -h' for more help on using cismapper.\n";
    exit(0);
  }

  # return options
  return $options;
} # arguments

sub job_ok {
  $logger->trace("call job_ok") if $logger;
  my ($registry, $job) = @_;
  my $entry = $registry->{$job};
  return defined($entry) && $entry->{status} == 0;
}

#
# Run a job and do something with the command line and stuff...
#
sub run_job {
  $logger->trace("call run_job") if $logger;
  my ($opts, $registry, $name, $required, $job, $suppress_messages, $outputs) = @_;
  # check that all requirements are avaliable
  foreach my $require (@{$required}) {
    my $reg = $registry->{$require};
    unless (defined($reg) && $reg->{status} == 0) {
      $logger->warn("skipped $name due to missing requirement $require") if $logger;
      print $progress_log "Skipped $name due to missing requirement $require\n";
      print "WARNING: skipped $name due to missing requirement $require.\n";
      return; # missing requirement
    }
  }
  my $msg_file = catfile($opts->{OUT_DIR}, $name . "_msgs.txt");
  # remove any previous message file
  unlink $msg_file if (-e $msg_file);
  # store any unstored messages
  my %msg = ();
  if (!(defined($job->{ALL_FILE}) || defined($job->{ALL_VAR}))) {
    if (!(defined($job->{ERR_FILE}) || defined($job->{ERR_VAR})) && 
      !(defined($job->{OUT_FILE}) || defined($job->{OUT_VAR}))) {
      $msg{ALL_FILE} = $msg_file;
    } elsif (!(defined($job->{ERR_FILE}) || defined($job->{ERR_VAR}))) {
      $msg{ERR_FILE} = $msg_file;
    } elsif (!(defined($job->{OUT_FILE}) || defined($job->{OUT_VAR}))) {
      $msg{OUT_FILE} = $msg_file;
    }
  }
  # convert the command to a string
  my $mono = $job->{MONO} ? "@WHICHMONO@" : "";
  my $cmd .= $mono . " " . &ExecUtils::stringify_args2(%{$job});
  # echo the command
  $logger->debug("About to invoke: $cmd") if $logger;
  print $progress_log "Invoking:\n  $cmd\n";
  unless($opts->{NOSTATUS}) {
    print STDOUT "Invoking:\n  $cmd\n";
    print "Starting $job->{PROG}: $cmd\n" if ($opts->{ECHO});
  }
  # run the command
  my $time;
  my $oot; # out of time flag
  my $status = &ExecUtils::invoke(%{$job}, %msg, TMPDIR => $temp_dir,
    TIME => \$time, OOT => \$oot, VALGRIND => $mono, LOGGER => $logger);  # HACK!!
  $logger->debug("Finished invoke of $name with status $status") if $logger;
  unless($opts->{NOSTATUS}) {
    print $progress_log "Finished invoke:\n".
      "  name: $name  status: $status  time: $time" . 
      ($oot ? "  (ran out of time!)\n" : "\n");
  }
  if ($opts->{ECHO}) {
    # echo program messages
    if (-s $msg_file && $opts->{ECHO} && !$suppress_messages) {
      my $fh;
      sysopen($fh, $msg_file, O_RDONLY);
      while (<$fh>) { print $_ };
      close($fh);
    }
    # print timeout warnings
    if ($oot) {
      print "Ran out of time! Stopping $job->{PROG}\n"
    }
    # print sucess or failure to stdout
    if ($status == 0) {
      unless($opts->{NOSTATUS}) { print "$job->{PROG} ran successfully in $time seconds\n"; }
    } else {
      if ($status == -1) {
        print "$job->{PROG} failed to run\n";
      } elsif ($status & 127) {
        print "$job->{PROG} process died with signal " . ($status & 127) . 
            ", " . (($status & 128) ? 'with' : 'without') . " coredump\n";
      } else {
        print "$job->{PROG} exited with error code " . ($status >> 8) . "\n";
      }
    }
  }
  # remove messages file if it's empty
  unlink($msg_file) unless (-s $msg_file);
  # find which directory it used
  my $out_dir;
  for (my $i = 0; $i < scalar(@{$job->{ARGS}})-1; $i++) {
    my $arg = $job->{ARGS}->[$i];
    if ($arg eq '-oc' || $arg eq '--oc') {
      $out_dir = $job->{ARGS}->[$i+1] if (-d $job->{ARGS}->[$i+1]);
      last;
    }
  }
  # store the result
  my $entry = {name => $name, cmd => $cmd, dir => $out_dir, 
    messages => $msg_file, status => $status, time => $time, 
    oot => $oot, outputs => $outputs};
  $registry->{$name} = $entry;
  push(@jobs, $entry); 
} # run_job

sub remove_commas {
  my ($string) = @_;
  $string =~ tr/,/ /;
  return($string);
} # remove_commas

sub output_html {
  $logger->trace("call output") if $logger;
  print $progress_log "Writing output\n";
  my ($opts, $registry) = @_;

  my $html_file = catfile($opts->{OUT_DIR}, 'cismapper.html');
  my $htmlwr = new HtmlMonolithWr($etc_dir, 'cismapper_template.html',
    $html_file, 'cismapper_data.js' => 'data');
  $htmlwr->set_logger($logger) if $logger;
  my $jsonwr = $htmlwr->output();
  my $out_dir = abs_path($opts->{OUT_DIR});
  $jsonwr->str_prop('version', $version);
  $jsonwr->str_prop('revision', $revision);
  $jsonwr->str_prop('release', $release);
  $jsonwr->str_array_prop('cmd', @cmd);

  $jsonwr->property('options');
  $jsonwr->start_object_value();
    $jsonwr->str_prop('locus_file', $opts->{LOCUS_FILE});
    $jsonwr->str_prop('rna_source', $opts->{RNA_SOURCE});
    $jsonwr->str_prop('tissues', &remove_commas($opts->{TISSUES}));
    $jsonwr->str_prop('histone_root', $opts->{HISTONE_ROOT});
    $jsonwr->str_prop('histone_names', &remove_commas($opts->{HISTONE_NAMES}));
    $jsonwr->str_prop('max_link_distances', &remove_commas($opts->{MAX_LINK_DISTANCES}));
    $jsonwr->str_prop('expression_root', $opts->{EXPRESSION_ROOT});
    $jsonwr->str_prop('expression_file_type', $opts->{EXPRESSION_FILE_TYPE});
    $jsonwr->str_prop('annotation_file_name', $opts->{ANNOTATION_FILE_NAME});
    $jsonwr->str_prop('annotation_type', $opts->{ANNOTATION_TYPE});
    $jsonwr->str_prop('transcript_types', &remove_commas($opts->{TRANSCRIPT_TYPES}));
    $jsonwr->str_prop('min_feature_count', $opts->{MIN_FEATURE_COUNT});
    $jsonwr->str_prop('min_max_expression', $opts->{MIN_MAX_EXPRESSION});
    $jsonwr->str_prop('max_html_score', $opts->{MAX_HTML_SCORE});
    $jsonwr->str_prop('description', $opts->{DESCRIPTION}) if $opts->{DESCRIPTION};
  $jsonwr->end_object_value();

  $jsonwr->property('programs');
  $jsonwr->start_array_value();
  for (my $i = 0; $i < scalar(@jobs); $i++){
    my $job = $jobs[$i];
    $jsonwr->start_object_value();
    $jsonwr->str_prop('name', $job->{name});
    $jsonwr->str_prop('cmd', $job->{cmd});
    $jsonwr->num_prop('status', $job->{status});
    $jsonwr->bool_prop('oot', $job->{oot});
    $jsonwr->num_prop('time', $job->{time});
    if (-e $job->{messages}) {
      $jsonwr->str_prop('messages_file', abs2rel(abs_path($job->{messages}), $out_dir))
    }
    $jsonwr->property('outputs');
    $jsonwr->start_array_value();
    if ($job->{outputs}) {
      for (my $j = 0; $j < scalar(@{$job->{outputs}}); $j++) {
        my $output = $job->{outputs}->[$j];
        if (-s $output->{FILE}) {
          $jsonwr->start_object_value();
          if ($output->{NAME}) {
            $jsonwr->str_prop('name', $output->{NAME});
          } else {
            my ($name) = fileparse($output->{FILE});
            $jsonwr->str_prop('name', $name);
          }
          $jsonwr->str_prop('file', abs2rel(abs_path($output->{FILE}), $out_dir));
          $jsonwr->end_object_value();
        }
      }
    }
    $jsonwr->end_array_value();
    $jsonwr->end_object_value();
  } # programs
  $jsonwr->end_array_value();

  # slurp the links file
  my $links_file = catdir($opts->{OUT_DIR}, 'links.tsv');
  my @lines;
  #open my $fh, '<', $links_file or die("Can't open links file $links_file: $!"); 
  my $fh;
  if (open($fh, '<', $links_file)) { 
    my $hdr = <$fh>;		# read the header line
    while (<$fh>) {
      next if (/^#/ || /^\s*$/);  # skip comment, blank lines
      push(@lines, $_);
    }
    close $fh;
  }

  # determine if we have TSS info
  my $have_tss_info = 0;
  my $num_tsses = 0;
  my $num_genes = 0;
  for (@lines) {
    chomp;
    my @fields = split '\t';
    my $tss_id = $fields[0];
    my $gene_id = $fields[14];
    $num_tsses = $fields[17] if ($num_tsses == 0);
    $num_genes = $fields[19] if ($num_genes == 0);
    if ($tss_id ne $gene_id) {
      $have_tss_info = 1;
      last;
    }
  }
  $jsonwr->bool_prop('have_tss_info', $have_tss_info);
  $jsonwr->num_prop('num_tsses', $num_tsses);
  $jsonwr->num_prop('num_genes', $num_genes);

  $jsonwr->property('regulatory_links');
  $jsonwr->start_array_value();
  for (@lines) {
    my @fields = split '\t';
    next if ($fields[5] > $opts->{MAX_HTML_SCORE});
    $jsonwr->start_object_value();
    $jsonwr->str_prop('gene_id', $fields[14]);
    $jsonwr->str_prop('gene_name', $fields[15]);
    $jsonwr->str_prop('tss_id', $fields[0]);
    $jsonwr->str_prop('tss_locus', $fields[1]);
    $jsonwr->str_prop('strand', $fields[2]);
    $jsonwr->str_prop('re_locus', $fields[3]);
    $jsonwr->num_prop('distance', $fields[11]);
    $jsonwr->str_prop('histone', $fields[12]);
    $jsonwr->str_prop('best_histone', $fields[13]);
    $jsonwr->num_prop('correlation', $fields[4]);
    $jsonwr->num_prop('score', $fields[5]);
    $jsonwr->num_prop('re_score', $fields[6]);
    $jsonwr->str_prop('best_re', $fields[7]);
    $jsonwr->num_prop('tss_score', $fields[8]);
    $jsonwr->str_prop('best_tss', $fields[9]);
    $jsonwr->num_prop('gene_score', $fields[10]);
    $jsonwr->num_prop('loci_per_tss', $fields[16]);
    $jsonwr->num_prop('tsses_per_gene', $fields[18]);
    $jsonwr->end_object_value();
  }
  $jsonwr->end_array_value();

  $htmlwr->output();
} # output_html

sub create_config_file{
  my ($opts, $max_link_distance) = @_;
  my $file = catdir($opts->{OUT_DIR}, $config_file);
  $opts->{CONFIG} = $file;
  open(my $fh, ">", $file) || die("Unable to open configuration file $file for writing: $!");
  my $contents = <<"END";
<document>
  <HistoneRoot>$opts->{HISTONE_ROOT}</HistoneRoot>
  <ExpressionRoot>$opts->{EXPRESSION_ROOT}</ExpressionRoot>
  <Tissues>$opts->{TISSUES}</Tissues>
  <ExpressionFileType>$opts->{EXPRESSION_FILE_TYPE}</ExpressionFileType>
  <MinFeatureCount>$opts->{MIN_FEATURE_COUNT}</MinFeatureCount>
  <MinimaxExpression>$opts->{MIN_MAX_EXPRESSION}</MinimaxExpression>
  <Annotation>
    <Type>$opts->{ANNOTATION_TYPE}</Type>
    <FileName>$opts->{ANNOTATION_FILE_NAME}</FileName>
  </Annotation>
  <TranscriptTypes>$opts->{TRANSCRIPT_TYPES}</TranscriptTypes>
  <ProcessedFileFormat>
    <Loci>$opts->{OUT_DIR}/$loci</Loci>
    <Histone>$opts->{OUT_DIR}/$histone_level</Histone>
    <TranscriptLocation>$opts->{OUT_DIR}/$transcript_location</TranscriptLocation>
    <TranscriptExpression>$opts->{OUT_DIR}/$transcript_expression</TranscriptExpression>
  </ProcessedFileFormat>
  <MaximumLinkDistance>$max_link_distance</MaximumLinkDistance>
</document>
END
  print $fh $contents;
  close($fh);
} # create_config_file

sub build_map {
  $logger->trace("call CorrelationMapBuilder") if $logger;
  my ($opts, $registry) = @_;
  my $t_build_map_start = [&gettimeofday()];
  #my $full_timeout = &time_allocate($opts, "build_map");
  my $full_timeout = undef;

  my $timeout = undef;
  if (defined($full_timeout)) {
    $timeout = $full_timeout - int(&tv_interval($t_build_map_start, [&gettimeofday()]) + 0.5);
    last if $timeout <= 0;
  }

  # delete any existing map file because cismapper.exe will append
  my $map_file = catdir($opts->{OUT_DIR}, "map.tsv");
  if (-e $map_file) {
    unlink $map_file || die("Unable to unlink the old map file $map_file: $!");
  }

  # run cismapper.exe on each type of histone, appending to the map file
  my @histones = split ",", $opts->{HISTONE_NAMES};
  my @distances = split ",", $opts->{MAX_LINK_DISTANCES};
  my $i = 0;
  foreach my $histone (@histones) {
    my $job_id = 'build_map';
    my $max_link_distance = $distances[$i++] + 0;
    &create_config_file($opts, $max_link_distance);
    my @args = (
      '-Mode', 'CorrelationMapBuilder', 
      '-Stage', 'RunPipeline',
      '-Config', $opts->{CONFIG},
      '-HistoneName', $histone,
      '-RnaSource', $opts->{RNA_SOURCE},
      '-Tissue', $opts->{TISSUE},
      '-LocusFileName', $opts->{LOCUS_FILE},
      '-MapFileName', $map_file
    );
    push (@args, '-UseGenes') if ($opts->{USE_GENES});
    &run_job($opts, $registry, $job_id, [], {
      MONO => 1, PROG => 'cismapper.exe', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout}, 1, [ ]);
  } # histone
} # build_map

sub make_tsv_files {
  $logger->trace("call make_tsv_files") if $logger;
  my ($opts, $registry) = @_;
  my $t_make_tsv_files_start = [&gettimeofday()];
  #my $full_timeout = &time_allocate($opts, "predict_links");
  my $full_timeout = undef;

  my $timeout = undef;
  if (defined($full_timeout)) {
    $timeout = $full_timeout - int(&tv_interval($t_make_tsv_files_start, [&gettimeofday()]) + 0.5);
    last if $timeout <= 0;
  }

  my $job_id = 'make_tsv_files';
  my $mono = 0;
  my $prog = 'cismapper';
  my $map_file = catdir($opts->{OUT_DIR}, 'map.tsv');
  my $annot_file = $opts->{ANNOTATION_FILE_NAME};
  my @args = (
    '-make-tsv-files',
    '-oc', $opts->{OUT_DIR},
    '-map-file', $map_file,
    '-annot-file', $annot_file
  );
  &run_job($opts, $registry, $job_id, [], {
    MONO => $mono, PROG => $prog, BIN => $secondary_bin_dir, ARGS => \@args, TIMEOUT => $timeout}, 1, [ ]);
} # make_tsv_files

sub predict_links {
  $logger->trace("call PredictLinks") if $logger;
  my ($opts, $registry) = @_;
  my $t_predict_links_start = [&gettimeofday()];
  #my $full_timeout = &time_allocate($opts, "predict_links");
  my $full_timeout = undef;

  my $timeout = undef;
  if (defined($full_timeout)) {
    $timeout = $full_timeout - int(&tv_interval($t_predict_links_start, [&gettimeofday()]) + 0.5);
    last if $timeout <= 0;
  }

  my $job_id = 'predict_links';
  my $map_file = catdir($opts->{OUT_DIR}, 'map.tsv');
  my $links_file = catdir($opts->{OUT_DIR}, $opts->{USE_GENES} ? "gene_links.tsv" : "tss_links.tsv");
  my @args = (
    '-Mode', 'PredictLinks', 
    '-MapFileName', $map_file,
    '-OutputFile', $links_file
  );
  push (@args, '-UseGenes') if ($opts->{USE_GENES});
  &run_job($opts, $registry, $job_id, [], {
    MONO => 1, PROG => 'cisMapper.exe', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout}, 1, [ ]);
} # predict_links

sub predict_targets{
  $logger->trace("call PredictTargets") if $logger;
  my ($opts, $registry) = @_;
  my $t_predict_targets_start = [&gettimeofday()];
  #my $full_timeout = &time_allocate($opts, "predict_targets");
  my $full_timeout = undef;

  my $timeout = undef;
  if (defined($full_timeout)) {
    $timeout = $full_timeout - int(&tv_interval($t_predict_targets_start, [&gettimeofday()]) + 0.5);
    last if $timeout <= 0;
  }

  my $job_id = 'predict_targets';
  my $map_file = catdir($opts->{OUT_DIR}, 'map.tsv');
  my $targets_file = catdir($opts->{OUT_DIR}, $opts->{USE_GENES} ? 'gene_targets.tsv' : 'tss_targets.tsv');
  my @args = (
    '-Mode', 'PredictTargets', 
    '-MapFileName', $map_file,
    '-OutputFile', $targets_file
  );
  push (@args, '-UseGenes') if ($opts->{USE_GENES});
  &run_job($opts, $registry, $job_id, [], {
    MONO => 1, PROG => 'cismapper.exe', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout}, 1, [ ]);
} # predict_targets

sub predict_elements {
  $logger->trace("call PredictElements") if $logger;
  my ($opts, $registry) = @_;
  my $t_predict_elements_start = [&gettimeofday()];
  #my $full_timeout = &time_allocate($opts, "predict_elements");
  my $full_timeout = undef;

  my $timeout = undef;
  if (defined($full_timeout)) {
    $timeout = $full_timeout - int(&tv_interval($t_predict_elements_start, [&gettimeofday()]) + 0.5);
    last if $timeout <= 0;
  }

  my $job_id = 'predict_elements';
  my $map_file = catdir($opts->{OUT_DIR}, 'map.tsv');
  my $elements_file = catdir($opts->{OUT_DIR}, $opts->{USE_GENES} ? 'gene_elements.tsv' : 'tss_elements.tsv');

  my $prog;
  my @args;
  my $mono;
  if ($opts->{USE_GENES}) {
    $mono = 0;
    $prog = 'cismapper';
    @args = (
      '-output-tsv-files',
      '-oc', $opts->{OUT_DIR},
      '-map-file', $map_file,
      '-output-file', $elements_file
    );
  } else {
    $mono = 1;
    $prog = 'cismapper.exe';
    @args = (
      '-Mode', 'PredictElements',
      '-MapFileName', $map_file,
      '-OutputFile', $elements_file
    );
  }
  &run_job($opts, $registry, $job_id, [], {
    MONO => $mono, PROG => $prog, BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout}, 1, [ ]);

  return 1;
} # predict_elements

sub browser_track {
  $logger->trace("call BrowserTrackFromMap") if $logger;
  my ($opts, $registry) = @_;
  my $t_browser_track_start = [&gettimeofday()];
  #my $full_timeout = &time_allocate($opts, "browser_track");
  my $full_timeout = undef;

  my $timeout = undef;
  if (defined($full_timeout)) {
    $timeout = $full_timeout - int(&tv_interval($t_browser_track_start, [&gettimeofday()]) + 0.5);
    last if $timeout <= 0;
  }

  my $job_id = 'browser_track';
  my $map_file = catdir($opts->{OUT_DIR}, 'map.tsv');
  my $tracks_file = catdir($opts->{OUT_DIR}, 'tracks.bed');
  my @args = (
    '-Mode', 'BrowserTrackFromMap', 
    '-MapFileName', $map_file,
    '-OutputFile', $tracks_file
  );
  push (@args, '-UseGenes') if ($opts->{USE_GENES});
  # remove the browser track in case the program fails and creates an empty one
  unlink $tracks_file if (-e $tracks_file);
  open(my $fh, ">", $tracks_file);
  close($fh);
  &run_job($opts, $registry, $job_id, [], {
    MONO => 1, PROG => 'cismapper.exe', BIN => $bin_dir, ARGS => \@args, TIMEOUT => $timeout}, 1, [ ]);
} # browser_track

#
# Compute the p-value of the minimum of n p-values.
#
sub ev {
  my ($p, $n) = @_;
  my $pv = $p*$n;
  return($pv <= 0.001 ? $pv : 1 - (1-$p)**$n);
}

sub print_command_line {
  my ($fh) = @_;
  printf($fh "\n# CisMapper (Prediction of Regulatory Links): Version $version compiled on $release\n");
  printf($fh "# The format of this file is described in $source_url/cismapper-output-format.html.\n");
  printf($fh "# %s\n", join(" ", @cmd));
} # print_command_line

sub make_tsv_files_helper {
  my ($outdir, $map_file, $annot_file) = @_;
  my ($fh, @lines);

  # Create a gene_id to gene_name dictionary
  my %gene_id_to_name = {};
  open $fh, '<', $annot_file or die $!;
  while (<$fh>) {
    my @fields = split '\t';
    my $data = $fields[8];
    @fields = split ';', $data;
    my ($gene_id, $gene_name) = ("", "");
    for (@fields) {
      if (/^\s*(\S+)\s+"(\S+)"/) {
        if ($1 eq "gene_id") {
          my $id = $2;
          $id =~ /^([^\.]+)/;
          $gene_id = $1;
        }
        $gene_name = $2 if ($1 eq "gene_name");
      }
    }
    #print "ID: $gene_id NAME: $gene_name\n\n";
    if ($gene_id) { $gene_id_to_name{$gene_id} = $gene_name ? $gene_name : "."; }
  }
  close $fh;

  # slurp the map file
  #open $fh, '<', $map_file or die("Can't open map file $map_file: $!"); 
  if (open($fh, '<', $map_file)) {
    @lines = <$fh>;
    close $fh;
  }
  
  # Get the TSS and gene score: minimum score over all its links
  # and save the other value in arrays.
  # Adjust the TSS scores for the number of loci.

  # @scores		the raw link score, indexed by line number in the map file
  # %re_scores		minimum TSS score adjusted by number of REs linked to it
  # %tss_scores		minimum re_score of a TSS, adjusted by the number TSSes
  # %gene_scores	minimum re_score of a gene, adjusted by the number genes
  # %re_count		number of REs linked to the TSS
  # %best_re		the best RE for the given TSS
  # %best_tss		the best TSS for the given gene
  # distance		adjust distance for strand

  my $file;
  my $have_tss_info = 0;
  my (@tsses, @genes, @new, @loci, @corrs, @scores);
  my (%re_scores, %tss_scores, %gene_scores);
  my (%re_count, %tss_count, %tss_to_gene, %tsses, %genes);
  my (%best_re, %best_tss, %best_histone);
  for (@lines) {
    chomp;
    my @fields = split /\t/;
    my $tss = $fields[3];
    my $locus = $fields[6];
    my $corr = $fields[7] > 0 ? 0 : 1;
    my $score = $fields[8];
    my $histone = $fields[10];
    my $gene = $fields[11];
    push @loci, $locus;
    push @corrs, $corr;
    push @scores, $score;
    push @tsses, $tss;
    push @genes, $gene;
    # Update the count of the number of Loci for this TSS.
    $re_count{$tss} = defined $re_count{$tss} ? $re_count{$tss}+1 : 1;
    # Save minimum link score for TSS.
    if (! defined($re_scores{$tss}) || $score < $re_scores{$tss}) {
      $re_scores{$tss} = $score;
      $best_re{$tss} = $locus;
      $best_histone{$tss} = $histone;
    }
    # Keep track of gene for this TSS.
    $tss_to_gene{$tss} = $gene;
    # Keep track of distinct tsses and genes.
    $tsses{$tss} = 1;
    $genes{$gene} = 1;
    # See if we have TSS info.
    $have_tss_info = 1 if (!$have_tss_info && $tss ne $gene);
  }

  # Adjust the re_score for the number of RE loci linked to the TSS.
  my $n_tsses = (keys %re_scores);
  foreach my $tss (keys %re_scores) {
    my $n_loci = $re_count{$tss};
    my $re_score = $re_scores{$tss};
    $re_score = $re_scores{$tss} = &ev($re_score, $n_loci);
    # Adjust the re_score for the number of TSSes to get the tss_score.
    $tss_scores{$tss} = &ev($re_score, $n_tsses);
    # Save minimum RE score for this gene.
    my $gene = $tss_to_gene{$tss};
    if (! defined($gene_scores{$gene}) || $re_score < $gene_scores{$gene}) {
      $gene_scores{$gene} = $re_score;
      $best_tss{$gene} = $tss;
    }
    # Update the count of the number TSSes for this Gene.
    $tss_count{$gene} = defined $tss_count{$gene} ? $tss_count{$gene}+1 : 1;
  }

  # Adjust the gene_score for the number of its TSSes
  # and then for the number of genes.
  my $n_genes = (keys %gene_scores);
  foreach my $gene (keys %gene_scores) {
    my $gene_score = $gene_scores{$gene};
    $gene_scores{$gene} = &ev($gene_score, $tss_count{$gene});
    $gene_score = $gene_scores{$gene};
    $gene_scores{$gene} = &ev($gene_score, $n_genes);
  }

  # Put the adjusted scores into arrays to make sorting faster.
  my (@re_scores, @tss_scores, @gene_scores);
  for (@lines) {
    my @fields = split /\t/;
    my $tss = $fields[3];
    my $score = $fields[8];
    push @re_scores, $re_scores{$tss};
    push @tss_scores, $tss_scores{$tss};
    push @gene_scores, $gene_scores{$tss};
  }

  # Links List
  #
  # Sort links by TSS score, 
  # then by TSS ID,
  # then by correlation direction,
  # then by locus.
  @new = @lines[ 
      sort {
        $tss_scores[$a] <=> $tss_scores[$b] ||
        $tsses[$a] cmp $tsses[$b] ||
        $corrs[$a] <=> $corrs[$b] ||
        $loci[$a] cmp $loci[$b]
      }0..$#lines
    ];
  my $links_file = catdir($outdir, 'links.tsv');
  open $fh, '>', $links_file or die("Can't open links file $links_file: $!"); 
  printf($fh "TSS_ID\tTSS_Locus\tStrand\tRE_Locus\tCorrelation\t" .
    "Score\tRE_Score\tBest_RE\t" .
    "TSS_Score\tBest_TSS\tGene_Score\t" .
    "Distance\tHistone\tBest_Histone\tGene_ID\tGene_Name\t" .
    "Loci_per_TSS\tNum_TSSes\tTSSes_per_Gene\tNum_Genes\n"
  );
  for (@new) {
    my @fields = split /\t/;
    my $tss = $fields[3];
    my $strand = $fields[5];
    my $re_locus = $fields[6];
    my $correlation = $fields[7];
    my $score = $fields[8];
    my $distance = ($strand eq "+") ? -$fields[9] : $fields[9];
    my $histone = $fields[10];
    my $gene = $fields[11];
    my $gene_name = $gene_id_to_name{$gene} ? $gene_id_to_name{$gene} : ".";
    my $n_loci = $re_count{$tss};
    my $best_histone = ($histone eq $best_histone{$tss}) ? "T" : "F";
    my $best_re = ($best_histone eq "T" and $re_locus eq $best_re{$tss}) ? "T" : "F";
    my $best_tss = ($best_re eq "T" and $tss eq $best_tss{$gene}) ? "T" : "F";
    printf($fh "%s\t%s:%d-%d\t%s\t%s\t%.5f\t", $tss, @fields[0..2], $strand, $re_locus, $correlation);
    printf($fh "%.3e\t%.3e\t%1s\t", $score, $re_scores{$tss}, $best_re);
    printf($fh "%.3e\t%1s\t%.3e\t", $tss_scores{$tss}, $best_tss, $gene_scores{$gene});
    printf($fh "%d\t%s\t%s\t%s\t%s\t", $distance, $histone, $best_histone, $gene, $gene_name);
    printf($fh "%d\t%d\t%d\t%d\n", $re_count{$tss}, $n_tsses, $tss_count{$gene}, $n_genes);
  }
  &print_command_line($fh);
  close $fh;

  # TSS Targets and Elements List
  my ($efh, $tfh);
  my $targets_file = catdir($outdir, 'tss_targets.tsv');
  my $elements_file = catdir($outdir, 'tss_elements.tsv');
  if ($have_tss_info) {
    # Sort links by the adjusted minimum TSS score,
    # then by score,
    # then by TSS ID
    # then by locus.
    @new = @lines[ 
	sort {
	  $tss_scores[$a] <=> $tss_scores[$b] ||
	  $scores[$a] <=> $scores[$b] ||
	  $tsses[$a] cmp $tsses[$b] ||
	  $loci[$a] cmp $loci[$b]
	}0..$#lines
      ];
    open $tfh, '>', $targets_file or die("Can't open targets file $targets_file: $!"); 
    open $efh, '>', $elements_file or die("Can't open elements file $elements_file: $!"); 
    printf($efh "TSS_ID\tGene_ID\tGene_Name\tStrand\tRE_Locus\tDistance\tHistone\tCorrelation_Sign\tScore\tTSS_Score\n");
    printf($tfh "TSS_ID\tGene_ID\tGene_Name\tStrand\tRE_Locus\tDistance\tHistone\tCorrelation_Sign\tScore\tTSS_Score\n");
    for (@new) {
      chomp;
      my @fields = split /\t/;
      my $tss = $fields[3];
      my $strand = $fields[5];
      my $locus = $fields[6];
      my $corr = $fields[7] > 0 ? 0 : 1;
      my $score = $fields[8];
      my $distance = ($strand eq "+") ? -$fields[9] : $fields[9];
      my $histone = $fields[10];
      my $gene = $fields[11];
      my $gene_name = $gene_id_to_name{$gene} ? $gene_id_to_name{$gene} : ".";
      # targets
      if ($tsses{$tss}) {
	 printf($tfh "%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%.3e\t%.3e\n", 
	   $tss, $gene, $gene_name, $strand, $locus, $distance, $histone, $corr?'-':'+', $score, $tss_scores{$tss});
         $tsses{$tss} = 0;
      }
      # elements
      printf($efh "%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%.3e\t%.3e\n", 
	$tss, $gene, $gene_name, $strand, $locus, $distance, $histone, $corr?'-':'+', $score, $tss_scores{$tss});
    }
    &print_command_line($tfh);
    &print_command_line($efh);
    close $tfh;
    close $efh;
  } else {
    unlink $targets_file if (-e $targets_file);
    unlink $elements_file if (-e $elements_file);
  }

  # Gene Targets and Elements Lists
  #
  # Sort links by the adjusted minimum gene score, 
  # then by link score,
  # then by gene ID,
  # then by locus.
  @new = @lines[ 
      sort {
        $gene_scores[$a] <=> $gene_scores[$b] ||
        $scores[$a] <=> $scores[$b] ||
        $genes[$a] cmp $genes[$b] ||
	$loci[$a] cmp $loci[$b]
      }0..$#lines
    ];
  $targets_file = catdir($outdir, 'gene_targets.tsv');
  $elements_file = catdir($outdir, 'gene_elements.tsv');
  open $tfh, '>', $targets_file or die("Can't open targets file $targets_file: $!"); 
  open $efh, '>', $elements_file or die("Can't open elements file $elements_file: $!"); 
  printf($tfh "Gene_ID\tGene_Name\tTSS_ID\tStrand\tRE_Locus\tDistance\tHistone\tCorrelation_Sign\tScore\tGene_Score\n");
  printf($efh "Gene_ID\tGene_Name\tTSS_ID\tStrand\tRE_Locus\tDistance\tHistone\tCorrelation_Sign\tScore\tGene_Score\n");
  for (@new) {
    chomp;
    my @fields = split /\t/;
    my $tss = $fields[3];
    my $strand = $fields[5];
    my $locus = $fields[6];
    my $corr = $fields[7] > 0 ? 0 : 1;
    my $score = $fields[8];
    my $distance = ($strand eq "+") ? -$fields[9] : $fields[9];
    my $histone = $fields[10];
    my $gene = $fields[11];
    my $gene_name = $gene_id_to_name{$gene} ? $gene_id_to_name{$gene} : ".";
    # targets
    if ($genes{$gene}) {
      printf($tfh "%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%.3e\t%.3e\n", 
        $gene, $gene_name, $tss, $strand, $locus, $distance, $histone, $corr?'-':'+', $score, $gene_scores{$gene});
      $genes{$gene} = 0;
    }
    # elements
    printf($efh "%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%.3e\t%.3e\n", 
      $gene, $gene_name, $tss, $strand, $locus, $distance, $histone, $corr?'-':'+', $score, $gene_scores{$gene});
  }
  &print_command_line($tfh);
  &print_command_line($efh);
  close $tfh;
  close $efh;

} # make_tsv_files_helper

sub main {
  my $commandline = join " ", @cmd;
  &initialise();
  my $opts = &arguments();
  mkpath($opts->{OUT_DIR}) unless (-e $opts->{OUT_DIR});
  &init_progress_log($opts->{OUT_DIR});
  my $registry = {};
  my $transcript_file = catdir($opts->{OUT_DIR}, $transcript_location);
  my $loci_file = catdir($opts->{OUT_DIR}, $loci);
  my $map_file = catdir($opts->{OUT_DIR}, 'map.tsv');

  &build_map($opts, $registry) unless ($opts->{NO_MAP});
  &make_tsv_files($opts, $registry);
  #&browser_track($opts, $registry);
  &output_html($opts, $registry);

  # remove some unnecessary files
  unlink($transcript_file) if (-e $transcript_file);
  unlink($loci_file) if (-e $loci_file);
  my $config = catdir($opts->{OUT_DIR}, $config_file);
  unlink($config) if (-e $config);
  unlink($map_file) if (-e $map_file && $opts->{REMOVE_MAP});

  print $progress_log "Done\n";
  close($progress_log);
  $logger->trace("done") if $logger;
}
