#!/usr/bin/ruby -w

# encodeLoad.rb - load ENCODE data submission generated by the
#                       automated submission pipeline
# Reads load.ra for information about what to do

# Writes error or log information to STDOUT
# Returns 0 if load succeeds.

# DO NOT EDIT the /cluster/bin/scripts copy of this file -- 
# edit the CVS'ed source at:
# $Header: /projects/compbio/cvsroot/kent/src/hg/encode/encodeLoad/doEncodeLoad.rb,v 1.10 2008/03/05 23:08:34 galt Exp $

$scripts = "/cluster/bin/scripts"

require "#{$scripts}/err.rb"
require "#{$scripts}/verbose.rb"
require "#{$scripts}/ra.rb"
#require "FileUtils"
require "tempfile"


# Global constants

# Global variables
$loadRa = 'out/load.ra'
$unloadRa = 'out/unload.ra'
$submitDir = ""
$submitType = ""
$tempDir = "/data/tmp"
$encodeDb = "hg18"
$encInstance = ""
$encProject = ""
$sqlCreate = "/cluster/bin/sqlCreate"

def usage 
    errAbort "usage: doEncodeLoad.rb submission_type project_submission_dir\n"
end

def loadGene(tableName, fileList)

  #TODO replace head -999 with cat

  if system( "head -1000 -q #{fileList} | egrep -v '^track|browser' | ldHgGene -genePredExt #{$encodeDb} #{tableName} stdin > out/loadGene.out 2>&1")
    print "#{fileList} Loaded\n"
    #debug restore: File.delete "genePred.tab"
  else
    STDERR.print "ERROR: File(s) '#{fileList}' failed gene load.\n"
    errAbort File.read("out/loadGene.out")
  end
end




def loadWig(tableName, fileList)

  #TEST by replacing "cat" with  "head -1000 -q"

  if system( "cat #{fileList} | wigEncode stdin stdout #{tableName}.wib | hgLoadWiggle -pathPrefix=/gbdb/#{$encodeDb}/wib -tmpDir=#{$tempDir} #{$encodeDb} #{tableName} stdin >out/loadWig.out 2>&1" )
      system( "rm -f /gbdb/#{$encodeDb}/wib/#{tableName}.wib" )
      system( "ln -s #{tableName}.wib /gbdb/#{$encodeDb}/wib" )
      print "#{fileList} Loaded\n"
  else 
      STDERR.print "ERROR: File(s) #{fileList} failed wiggle load.\n"
      errAbort File.read("out/loadWig.out")
  end
end


def loadBed(tableName, fileList)

  #TEST by replacing "cat" with  "head -1000 -q"

  cmd = "cat #{fileList} | egrep -v '^track|browser' | hgLoadBed #{$encodeDb} #{tableName} stdin -tmpDir=out >out/loadBed.out 2>&1"

  #STDERR.puts "debug: cmd = [#{cmd}]" #debug

  if system( cmd )
      print "#{fileList} Loaded\n"
      #debug restore: File.delete "out/bed.tab"
  else
      STDERR.print "ERROR: File(s) #{fileList} failed bed load.\n";
      errAbort File.read("out/loadBed.out")
  end

end


def loadBed5Plus(tableName, fileList, sqlTable)

  unless File.exist? "#{$sqlCreate}/#{sqlTable}.sql"
    errAbort "#{$sqlCreate}/#{sqlTable}.sql not found "
  end
  sql = File.read "#{$sqlCreate}/#{sqlTable}.sql"
  unless sql.gsub! sqlTable, tableName
    errAbort "sql names do not match for substitution: #{$sqlTable} #{tableName}"
  end

  #STDERR.puts "sql=[#{sql}]"  #debug

  temp_file = Tempfile.new('sql')
  temp_file.print sql
  temp_file.flush

  #TEST by replacing "cat" with  "head -1000 -q"

  cmd = "cat #{fileList} | egrep -v '^track|browser' | hgLoadBed #{$encodeDb} #{tableName} stdin -tmpDir=out -sqlTable=#{temp_file.path} >out/loadBed.out 2>&1"

  #STDERR.puts "debug: cmd = [#{cmd}]" #debug

  if system( cmd )
      print "#{fileList} Loaded\n"
      #debug restore: File.delete "out/bed.tab"
  else
      STDERR.print "ERROR: File(s) #{fileList} failed bed load.\n"
      errAbort File.read("out/loadBed.out")
  end
  temp_file.close

end


############################################################################
# Main

# Change dir to submission directory obtained from command-line

if ARGV.length != 2
  usage
end


$submitType = ARGV[0]	# currently not used
$submitDir = ARGV[1]

$encInstance = File.dirname($submitDir)
$encProject = File.basename($submitDir)

und = $encInstance.rindex('_')
if und
  $encInstance = $encInstance[und,$encInstance.length]
else
  $encInstance = ""
end

Dir.chdir $submitDir

# clean out any stuff from previous load
unless system("doEncodeUnload.rb #{$submitType} #{$submitDir}")
  errAbort "expected error running doEncodeUnload.rb cleanup script"
end

unless File.exist? $loadRa
  errAbort "unexpected error: load.ra not found\n"
end

#TODO change to : FileUtils.cp $loadRa, $unloadRa
system "cp #{$loadRa} #{$unloadRa}"

verbose 2, "$encInstance=[#{$encInstance}] und=#{und}\n"

verbose 1, "Loading project in directory #{$submitDir}\n"


# Load files listed in load.ra

#debug
savev = $opt_verbose
if $opt_verbose < 2
#    $opt_verbose = 2
end

ra = readRaFile $loadRa

$opt_verbose = savev  #debug

#STDERR.puts "debug: ra.length=#{ra.length}\n"  #debug
#STDERR.puts "debug: #{ra.inspect}\n"  #debug

STDERR.puts "\n"  #debug

ra.each do |x|
  h = x[1]
  tablenameExt = "#{h["tablename"]}#{$encInstance}_#{$encProject}"
  verbose 2, "debug: #{x[0]}\n"  
  verbose 2, "  tablename #{h["tablename"]}\n" 
  verbose 2, "  type      #{h["type"]}\n"  
  verbose 2, "  tableType #{h["tableType"]}\n"  
  verbose 2, "  assembly  #{h["assembly"]}\n" 
  verbose 2, "  files     #{h["files"]}\n"  
  verbose 2, "  tablenameExt #{tablenameExt}\n" 

  # temporary work-around
  $encodeDb = h["assembly"]

  case h["type"]
  when "genePred"
    loadGene tablenameExt, h["files"]
  when "wig"
    loadWig tablenameExt, h["files"]
  when "bed 5 +"
    loadBed5Plus tablenameExt, h["files"], h["tableType"]
  when "bed 3"
    loadBed tablenameExt, h["files"]
  when "bed 4"
    loadBed tablenameExt, h["files"]
  when "bed 5"
    loadBed tablenameExt, h["files"]
  when "bed 6"
    loadBed tablenameExt, h["files"]
  else
    errAbort "unexpected error: unknown type #{h["type"]} in load.ra\n"
  end 
  STDERR.puts "\n"  #debug
end

exit 0

