########################################
# The contents of this file are subject to the MLX PUBLIC LICENSE version
# 1.0 (the "License"); you may not use this file except in
# compliance with the License.
# 
# Software distributed under the License is distributed on an "AS IS"
# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
# the License for the specific language governing rights and limitations
# under the License.
# 
# The Original Source Code is "compClust", released 2003 September 03.
# 
# The Original Source Code was developed by the California Institute of
# Technology (Caltech).  Portions created by Caltech are Copyright (C)
# 2002-2003 California Institute of Technology. All Rights Reserved.
########################################
#
# Filename     : Supervised.py
# Description  : Base class for Supervised ML Algorithm Wrappers
# Author(s)    : Ben Bornstein
# Organization : Machine Learning Systems, Jet Propulsion Laboratory
# Created      : February 2002
# Revision     : $Id: Supervised.py,v 1.5 2004/04/02 02:40:14 diane Exp $
# Source       : $Source: /proj/CVS/code/python/compClust/mlx/Supervised.py,v $
#

"""
Supervised

This is a base class providing default services and a generic interface for
Supervised Machine Learning algorithms.

"""
import tempfile
import ML_Algorithm


class Supervised (ML_Algorithm.ML_Algorithm):
  """Supervised Machine Learning Algorithm

  """
  
  def __init__(self, dataset=None, labeling=None, parameters=None, model=None):
    """Supervised(dataset, labeling, parameters) or Supervised(dataset, model)

    Creates a generic Supervised Machine Learning algorithm which has no
    real implementation.  It is up to subclasses to provide that
    implementation.  As such, this constructor should be called only in the
    constructor of classes inheriting from Supervised.  For example:

      def __init__(self, dataset=None, labeling=None, parameters=None,
                   model=None):
        Supervised.__init__(self, dataset, labeling, parameters, model)

    There are two different forms for instantiating Supervised learning
    algorithms, depending on the way in which you would like the algorithm
    to operate.

    In the first mode, 'learn', the Supervised object is created for
    training.  That is, when the algorithm is run() it will attempt to
    learn a mapping from each datum in dataset to the corresponding label
    in labeling.  The training parameters are configured according to the
    parameters dictionary.  Once training is complete, the learned model (a
    subclass of SupervisedModel) may be retrieved with getModel().  The
    learned model may be used (when creating new Supervised objects) to
    predict labelings from data.

    In the second form, 'predict', the Supervised object is created for
    predicting.  That is, when the algorithm is run() it will attempt to
    predict a label for each datum in dataset.  The algorithm may be
    partially or completely configured by the given model.  Once prediction
    is complete the predicted labeling may be retrieved with getLabeling().

    To determine the operating mode of a Supervised object (either
    'learn' or 'predict'), call its getMode() method.

    """
    #
    # Distinguish between the two possible forms of the constructor.
    #
    if isinstance(labeling, SupervisedModel):
      model    = labeling
      labeling = None

    if model is None:
      mode = 'learn'
    else:
      mode = 'predict'

    if parameters is None:
      parameters = {}

    self.dataset    = dataset
    self.labeling   = labeling
    self.parameters = parameters
    self.model      = model
    self.mode       = mode

    self.default_tempdir = tempfile.gettempdir()
    
  def getMode(self):
    """getMode() -> 'learn' | 'predict'

    Returns the current mode of this algorithm, either the string
    'learn' or 'predict'.

    If mode is 'learn', this algorithm will or did attempt to learn a
    model that maps dataset to labeling.  That is, run() will produce
    a model which can be retrieved with getModel().

    Once a model has been learned, it may be used to 'predict'
    labelings of datasets.  In this case, run() will produce a
    labeling which can be retrieved with getLabeling().

    """
    return self.mode


  def getModel(self):
    """getModel() -> Model

    Returns the model for this Supervised Machine Learning algorithm.
    """
    return self.model




class SupervisedModel:
  """SupervisedModel

  SupervisedModel is a place holder until our Model classes and heirarchy
  can be redesigned to be less MoG centric.

  """

  def __init__(self, labeling=None, labelMap=None):
    """SupervisedModel(labeling=None, labelMap=None) -> SupervisedModel

    Creates a new SupervisedModel based on either the given labeling (from
    which a LabelMap and reverse LabelMap are immediately created) or the
    given labelMap.

    A labelMap is a dictionary which maps labels to some other identifier
    (usually consecutive integers indicating a classification for each
    unique label) suitable for the Supervised algorithm.  A reverseLabelMap
    is a dictionary which maps the identifiers in labelMap to their
    corresponding labels.

    """
    if labeling is not None:
      labelMap = self.__createLabelMap(labeling)

    self.labelMap        = labelMap
    self.reverseLabelMap = self.__createReverseMap(labelMap)
    return None


  def getLabelMap(self):
    """getLabelMap() -> dictionary

    Returns the SupervisedModel's labelMap, a dictionary keyed on each
    unique label whose value is the next consectutive integer, starting at
    0.

    See also getReverseLabelMap().

    """
    return self.labelMap


  def getReverseLabelMap(self):
    """getReverseLabelMap() -> dictionary

    Returns the SupervisedModel's reverseLabelMap, a dictionary keyed on
    the values of labelMap, whose values in turn, are the keys of labelMap.

    See also getLabelMap().

    """
    return self.reverseLabelMap


  def __createLabelMap(self, labeling):
    """__createLabelMap(labeling) -> dictionary

    Creates a default LabelMap, a dictionary keyed on each unique label
    whose value is the next consectutive integer, starting at 0.

    """
    
    labels    = labeling.getLabels()
    numLabels = len(labels)
    labelMap  = {}

    for n in range(numLabels):
      labelMap[ labels[n] ] = n

    return labelMap


  def __createReverseMap(self, dictionary):
    """__createReverseMap(dictionary) -> dictionary

    Creates and returns a dictionary with the given dictionary's keys and
    values swapped.  This method is used to create a reverse LabelMap, such
    that a label can be looked-up given an integer.

    See also __createLabelMap().

    """
    reverseMap = {}
    items      = dictionary.items()

    for item in items:
      reverseMap[ item[1] ] = item[0]

    return reverseMap
