########################################
# The contents of this file are subject to the MLX PUBLIC LICENSE version
# 1.0 (the "License"); you may not use this file except in
# compliance with the License.
# 
# Software distributed under the License is distributed on an "AS IS"
# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
# the License for the specific language governing rights and limitations
# under the License.
# 
# The Original Source Code is "compClust", released 2003 September 03.
# 
# The Original Source Code was developed by the California Institute of
# Technology (Caltech).  Portions created by Caltech are Copyright (C)
# 2002-2003 California Institute of Technology. All Rights Reserved.
########################################
#
#       Author:  Lucas Scharenbroich
#
# Last Modified: Sept. 8, 2001 
#

"""
 The Launcher wrapper is intended to process all global options related
 to any wrapper.  Parameters checking, file permissions and other common,
 housekeeping operation are performed here rather than in the wrappers
 themselves.
"""

import os
import re
import sys
import string
import inspect

from compClust.util import Usage
from compClust.util import Verify
from compClust.util import WrapperUtil
from compClust.util.TimeStampedPrintStream import TimeStampedPrintStream

from compClust.mlx.datasets import Dataset
from compClust.mlx.ML_Algorithm import ML_Algorithm

MESSAGE_STREAM = TimeStampedPrintStream("%Y-%b-%d %H:%m: Launcher: ")

def parseCmdline(opts):
  """(parms, args) = parseCmdline(opts)

  Given a list of command line options, parseCmdline segregate those beginning
  with a '--' character sequence into a distionary of key/value pairs.  Every
  option has the format '--xxxx=yyyy'  where xxxx is the key and yyyy is the
  value.  Options not beginning with '--' are placed in a separate list.
  """
  
  parms = {}
  args  = []

  for i in opts:

    #
    # check for a leading '--'
    #

    if i[:2] == '--':
      value      = ''
      kvpair     = string.split(i[2:],'=')
      key        = kvpair[0]

      if len(kvpair) == 2:
        value      = kvpair[1]
        
      parms[key] = value
      
      if len(value) > 0 and value[0] in string.digits:
        
        #
        # could be an float or (probably) an integer.  But we don't know
        # so use eval() rather than int() or float() 
                
        parms[key] = eval(value)
      
    else:
      args.append(i)

  return (parms, args)

  
def verify(parameterFilename, datasetFilename, resultsFilename, wrapper):

  fail = 0

  if not Verify.fs_objects_have_permissions(parameterFilename, os.R_OK ):
    MESSAGE_STREAM.write("Parameter file is not readable\n")
    fail = 1

  if not Verify.fs_objects_have_permissions(datasetFilename, os.R_OK ):
    MESSAGE_STREAM.write("Dataset file is not readable\n")
    fail = 1

  if not Verify.fs_objects_have_permissions(os.path.dirname(resultsFilename),
                                            os.W_OK ):
    MESSAGE_STREAM.write("Result file [%s] is not writeable\n" %
                         (resultsFilename))
    fail = 1

  if Verify.fs_objects_have_permissions(resultsFilename, os.F_OK ):
    MESSAGE_STREAM.write("Result file [%s] already exists\n" %
                         (resultsFilename))
                         
    fail = 1

  if not isinstance(wrapper, ML_Algorithm):
    MESSAGE_STREAM.write("Wrapper is not of type ML_Algorithm\n")
    fail = 1

  return not fail


def main(argv, wrapper):
  """
  Usage: <Launcher may only be called from other python modules>
  """

  import compClust.mlx.wrapper

  cmdlineParameters = {}

  #
  # Shave off the first arg and create out documentation string
  # from the .py file
  
  opts, args = parseCmdline(argv[1:])
  
  #
  # Has the user asked for help for this wraper?
  # If so, all wrappers should have a main() function whcih we can get at.
  # To print full command line uses, print the main help and append the
  # help of the class as well
  #
  
  if opts.has_key('help'):
    Usage.showHelp(inspect.getmodule(wrapper.__init__.im_class), exit=1)

  #
  # Must specify a parameter file, dataset file, and output file
  # at least.  Or show the quick summary
  #

  if (len(args) != 3) or opts.has_key('h'):
    Usage.showUsage(inspect.getmodule(wrapper.__init__.im_class), exit=1)

  #
  # Set the output locations for the log files
  #
   
  Verify     .set_error_stream( MESSAGE_STREAM )
  WrapperUtil.set_error_stream( MESSAGE_STREAM )

  #
  # 'Parse' the command line options
  #

  parameterFilename = args[0]
  datasetFilename   = args[1]
  resultsFilename   = os.path.abspath(args[2])

  #
  # Get the base directory name for the output files
  #
  
  resultsDir, resultsShortFilename  = os.path.split(resultsFilename)
  
  #
  # Get the full base pathname for the output file to save tmp
  # files is the 'save_intermediate_files' option is 'yes'

  resultsBase       = os.path.splitext(resultsShortFilename)[0]
                                                          
  #
  # Make sure that the input files are readable, the results file
  # does not exits and the wrapper we got is, in fact, an ML_Algorithm.
  # Fail if any of these conditions is not met
  #

  if not verify(parameterFilename, datasetFilename, resultsFilename, wrapper):
    MESSAGE_STREAM.write("bad permissions on input files\n")
    sys.exit(1)
    
  #
  # Load in the parameter and data file
  #

  parameters = WrapperUtil.load_parameter_file(parameterFilename)
  dataset    = Dataset(datasetFilename)

  parameters[ "results_dir"                  ] = resultsDir
  parameters[ "save_intermediate_files_base" ] = resultsShortFilename

  #
  # merge the parameters from the command line into the parameters hash.
  # This will allow for the effective behavior of overriding parameters
  # on the command line.  THIS IS NOT SUPPORTED BEHAVIOR!!!  USE AT YOUR
  # OWN RISK!!!  Code which relies on this behavior may break at any
  # time.
  #
  
  parameters.update(opts)

  #
  # Check to see if hierarchical clustering or MCCV is turned on.  If
  # so, create an instance of the meta-wrapper here.  If both meta-
  # wrappers are on, then do hierarchical mccv.
  #
  # Since MCCV & HIERARCHICAL can be passed via cmdline or file, check
  # for both upper and lower case
  #
  
  if ((parameters.has_key( "MCCV" ) and parameters["MCCV"] == 'on') or
      (parameters.has_key( "mccv" ) and parameters["mccv"] == 'on')):

    wrapper = compClust.mlx.wrapper.MCCV(dataset, parameters, wrapper)
    MESSAGE_STREAM.write("mccv on\n");

  if ((parameters.has_key("HIERARCHICAL") and
       parameters["HIERARCHICAL"] == 'on')
      or
      (parameters.has_key("hierarchical") and
       parameters["hierarchical"] == 'on')):

    wrapper = compClust.mlx.wrapper.Hierarchical(dataset, parameters, wrapper)
    MESSAGE_STREAM.write("hierarchical clustering on\n");

  #
  # Catch a verbose flag and print out the parameters
  #

  if parameters.has_key( "verbose" ):
    for k in parameters.keys():
      s = string.rjust(str(parameters[k]), 30-len(str(k)+": "))
      print k + ": " + s
      
  #
  # Fill in the wrapper fields
  #

  wrapper.setDataset(dataset)
  wrapper.setParameters(parameters)

  #
  # validate the parameters and run the wrapper if OK
  #

  if wrapper.validate():
    status = wrapper.run()
    if status == compClust.mlx.wrapper.WRAPPER_STATUS_ERROR:
      MESSAGE_STREAM.write("algorithms terminated with an error\n")
    labels = wrapper.getLabeling()
    if labels is not None:

      #
      # Convert the class labels to strings and then insert newlines.
      # Add an empty string to the end of the list to get a trailing
      # newline.
      #
      
      classes = map(str, labels.getLabelByRows())
      text = string.join(classes + [''], "\n")

      outputStream = open(resultsFilename, "w")
      outputStream.write(text)
      outputStream.close()
    else:
      MESSAGE_STREAM.write("no labeling returned\n");
  else:
    MESSAGE_STREAM.write("wrapper parameters failed validation\n")

if __name__ == "__main__":
  Usage.showUsage(main, exit=1)











