########################################
# The contents of this file are subject to the MLX PUBLIC LICENSE version
# 1.0 (the "License"); you may not use this file except in
# compliance with the License.
# 
# Software distributed under the License is distributed on an "AS IS"
# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
# the License for the specific language governing rights and limitations
# under the License.
# 
# The Original Source Code is "compClust", released 2003 September 03.
# 
# The Original Source Code was developed by the California Institute of
# Technology (Caltech).  Portions created by Caltech are Copyright (C)
# 2002-2003 California Institute of Technology. All Rights Reserved.
########################################
#
#       Authors: Lucas Scharenbroich
# Last Modified: 24-Oct-2001, 11:00
#

"""
Contains symbolic labels for datum in the Dataset class.

A Labeling is simply a collection of labels applied to a dataset.  There is
no restriction on the the kinds of labels or how or what they are applied to.
The only current concrete implementation deals with labeling rows or columns
of a dataset.  Other conceivable Labeling classes could deal with labeling
regions of an image.
"""

from types import *

import Numeric
import sets
import string
import sys
import types
import operator

from compClust.util.unique import unique
from compClust.util.listOps import unravel

from compClust.mlx.interfaces import ILabeling
from compClust.mlx.interfaces import IDataset
from compClust.mlx.ML_Algorithm import ML_Algorithm
from compClust.mlx.views import SubsetView

class LabelingDatasetLengthError(ValueError):
    """The number of labels doesn't match the size of the dataset
    """
    pass

class Labeling(ILabeling):
  """
  Implementation of the ILabeling interface
  
  The Labeling class is a container class for Labels.  When created, the
  Labeling only knows about the dataset object to which it is bound.  This
  dataset object may be a Dataset, View, PagedDataset or other class derived
  from IDataset.

  When labels are attatched to the underlying dataset, a unique ID is requested
  from the dataset and that ID is used as a cookie. This allows for the
  Labelings to uniquely look for and/or filter their labels in the dataset
  """
  
  def __init__(self, dataset, name=None, nolink=0): 

    if not isinstance(dataset, IDataset):
      raise TypeError()

    self.dataset = dataset
    self.name    = name
    
    self.lab2uid = {}
    self.uid2lab = {}

    self.__has_column_labels = False
    self.__has_row_labels = False
    self.__has_cell_labels = False
    
    if not nolink:
      dataset.addLabeling(self)


  def _getBaseRef(self):
    """
    Returns the 'true' object self referres to, sort of our own style of
    weak-references
    """
    return self

  
  def _getLocalRef(self, target):
    """
    Returns a reference to self relative to the passed View.  For this class
    if target != self.dataset, return None
    """

    if target is not self.getDataset():
      return None
    else:
      return self


  def getName(self):
    """
    Returns the name of this labeling.

    Names are indentifierd bestowed upon the labeling when it is created or at
    a later point via the setName() method.  The name may be None.
    """

    return str(self.name)
    

  def setName(self, name):
    """
    Sets the name for the labeling.

    This method replaces the current LAbeling name with the one specified.
    """
    self.name = str(name)


  def sortDatasetByLabel(self):
    """
    Row sorts a SortedView via labeling information.

    This method only works if the labeling is attatched to a sorted view
    (or derivative).
    """

    from compClust.mlx.views import SortedView

    if isinstance(self.getDataset(), SortedView):
      rows = self.getDataset().getNumRows()
      tuples = zip(self.getLabelByRows(), range(rows))
      tuples.sort()
      self.getDataset().permuteRows(map(operator.getitem, tuples, [1] * rows))
    else:
      raise TypeError("the labeling dataset must be derived from a SortedView")

  def getDataset(self):
    """
    Returns the dataset instance to which this labeling is bound.
    """
    
    return self.dataset

  
  def writeLabels(self, stream=sys.stdout, delimiter="\t"):
    """
    Prints out all row labels to the specified stream.

    The labels will be written out in row order from smallest to largest.
    If there is more than one label on a line, the labels are separated by
    the specified delimiter.
    """

    for labels in self.getAllKeyLabels():
      stream.write(string.join(map(str, labels), delimiter))
      stream.write("\n")


  def labelRows(self, obj):
    """
    Labels all the rows with the contents of the object. 

    labelRows() casts the object to a list of hashable python objects and then
    labels the rows of the dataset with the resulting objects.  See the
    _castLabels() method for details on how different objects are cast.
    """
    
    labels = self._castLabels(obj)
    if len(labels)!= self.dataset.getNumRows():
        error_msg = "The number of row labels [%d] does not match the number of rows [%d]" 
        error_msg %= (len(labels), self.dataset.getNumRows())
        raise LabelingDatasetLengthError(error_msg)
    map(self.addLabelsToRow, labels, range(len(labels)))


  def labelCols(self, obj):
    """
    Labels all columns with the contents of the the object.

    Performs identically to labelRows(), except on columns.
    """
    
    labels = self._castLabels(obj)
    if len(labels)!= self.dataset.getNumCols():
        error_msg = "The number of column labels [%d] does not match the number of columns [%d]" 
        error_msg %= (len(labels), self.dataset.getNumCols())
        raise LabelingDatasetLengthError(error_msg)
    map(self.addLabelsToCol, labels, range(len(labels)))

        
  def _castLabels(self, obj):

    """
    Converts the obj to a list of lists of objects which are compatible with
    the labeling system.

    If obj is a          it is cast (converted) using
    ---------------      ----------------------------
    String               FileIO.readLabelFile()
    FileType (stream)    FileIO.readLabelStream()
    ListType             map(list, obj)
    TupleType            [obj]
    IntType              [[obj]]
    LongType             [[obj]]
    FloatType            [[obj]]
    ComplexType          [[obj]]
    DictType             map(list, obj.values())
    ArrayType            map(list, obj.tolist())
    
    Otherwise an empty list is returned
    """

    from compClust.util.FileIO import readLabelFile, readLabelStream
    
    t = type(obj)
    if t == IntType or t == LongType or t == FloatType or t == ComplexType:
      labels = [[obj]]

    elif t == ListType:
      labels = map(lambda x : [x], obj)

    elif t == TupleType:
      labels = [[obj]]

    elif t == DictType:
      labels = map(lambda x : [x], obj.values())

    elif (t in types.StringTypes):
      labels = readLabelFile(obj)
      
    elif (hasattr(obj, 'readline')):
      labels = readLabelStream(obj)

    elif (t == Numeric.ArrayType):
      labels = map(lambda x: [x], obj.tolist())
      
    else:
      labels = [[]]  

    return labels


  def _getUID(self, label):
    try:
      uid = self.lab2uid[label]
    except:
      uid = self.getDataset()._getUID()
      self.lab2uid[label] = uid
      self.uid2lab[uid]   = label
    return uid

  ###########################################################################
  #
  # Marking the dataset
  #
  ###########################################################################

  def addLabelToRow(self, label, row):
    """
    Attatches a label to a particular row of the associated dataset.
    """

    key = self.getDataset().getRowKey(row)
    self.__addLabelToKey(label, key)
    self.__has_row_labels = True

  def addLabelToCol(self, label, col):
    """
    Attatches a label to a particular column of the associated dataset.
    """

    key = self.getDataset().getColKey(col)
    self.__addLabelToKey(label, key)
    self.__has_column_labels = True


  def __addLabelToKey(self, label, key):
    """
    Attatches a label to a particular key of the associated dataset without changing its row/col/cell state
    """

    uid = self._getUID(label)
    self.getDataset()._addUID(uid, key)

  def addLabelToKey(self, label, key):
    """
    Attatches a label to a particular key of the associated dataset.
    """

    self.__addLabelToKey(label, key)
    dataset = self.getDataset()
    if dataset.isRowKey(key):
      self.__has_row_labels = True
    elif dataset.isColKey(key):
      self.__has_column_labels = True
    else:
      self.__has_cell_labels = True

  def addLabelToRows(self, label, rowList):
    """
    Attatches the label to all of the rows in rowList.  Uses addLabelToRow()
    underneath.
    """

    map(self.addLabelToRow, [label] * len(rowList), rowList)


  def addLabelToCols(self, label, colList):
    """
    Attatches the label to all of the columns in colList.  Uses addLabelToCol()
    underneath.
    """
    
    map(self.addLabelToCol, [label] * len(colList), colList)


  def addLabelToKeys(self, label, keyList):
    """
    Attatches the label to all of the keys in keyList.  Uses addLabelToCol()
    underneath.
    """

    map(self.addLabelToKey, [label] * len(keyList), keyList)


  def addLabelsToRow(self, labels, row):
    """
    Attatches a set of labels to a particular row.
    """
    if type(labels) not in  (types.ListType, types.TupleType):
      raise ValueError("addLabelsToRow requires a list or tuple")
    
    map(self.addLabelToRow, labels, [row] * len(labels))

  def addLabelsToCol(self, labels, col):
    """
    Attatches a set of labels to a particular column.
    """
    if type(labels) not in  (types.ListType, types.TupleType):
      raise ValueError("addLabelsToCol requires a list or tuple")
    
    map(self.addLabelToCol, labels, [col] * len(labels))


  def addLabelsToKey(self, labels, key):
    """
    Attatches a set of labels to a particular key.
    """
    if type(labels) not in  (types.ListType, types.TupleType):
      raise ValueError("addLabelsToKey requires a list or tuple")
    
    map(self.addLabelToKey, labels, [key] * len(labels))


  def addLabelsToRows(self, labels, rows):
    """
    Attatches a list of label sets to a set of rows.
    """

    map(self.addLabelsToRow, labels, rows)

  def addLabelsToCols(self, labels, cols):
    """
    Attatches a list of label sets to a set of columns.
    """
    
    map(self.addLabelsToCol, labels, cols)


  def addLabelsToKeys(self, labels, keys):
    """
    Attatches a list of label sets to a set of keys.
    """
    
    map(self.addLabelsToKey, labels, keys) 


  ###########################################################################
  #
  # information retrieval
  #
  ###########################################################################
  
  def getLabels(self):
    """
    Returns a list of all the labels this labeling has knowledge about.
    """

    return self.lab2uid.keys()


  def getAllRowLabels(self):
    """
    Returns a list of lists of the labels for all the rows.
    """

    return map(self.getLabelsByKey, self.getDataset().getRowKeys())


  def getAllColLabels(self):
    """
    Returns a list of lists of the labels for all the columns.
    """

    return map(self.getLabelsByKey, self.getDataset().getColKeys())

  
  def getAllKeyLabels(self):
    """
    Returns a list of lists of the labels for all the keys.
    """
    
    keys = self.getDataset().getRowKeys() + self.getDataset().getColKeys()
    return map(self.getLabelsByKey, keys)


  def getLabelsByRow(self, row):
    """
    Returns a list of all labels associated with a particular row.
    """
    
    key = self.getDataset().getRowKey(row)
    return self.getLabelsByKey(key)

  def getLabelsByCol(self, col):
    """
    Returns a list of all labels associated with a particular column
    """
    
    key = self.getDataset().getColKey(col)
    return self.getLabelsByKey(key)
  
  def getLabelsByKey(self, key):
    """
    Returns a list of labels for a specific key.  The labels returned are
    only those created by this Labeling.
    """
    
    uids = self.getDataset()._getUIDsByKey(key)

    #
    # This only works if the unique IDs for the labels are never 0
    #

    uid2lab = self.uid2lab
    return map(uid2lab.get, filter(uid2lab.has_key, uids))


  def getLabelsByRows(self, rows):
    """
    Returns a list of lists of all labels from a set of rows.
    """  
    return map(self.getLabelsByRow, rows)


  def getLabelsByCols(self, cols):
    """
    Returns a list of lists of all labels from a set of columns.
    """
    return map(self.getLabelsByCols, cols)


  def getLabelsByKeys(self, keys):
    """
    Returns a list of lists of all labels from a set of keys.
    """
    return map(self.getLabelsByKey, keys)


  def getLabelByRow(self, row, n=0):
    """
    Gets the Nth label from a given row.  Returns None if a label does
    not exist.
    """

    try:
      label = self.getLabelsByRow(row)[n]
    except IndexError:
      label = None
    return label


  def getLabelByCol(self, col, n=0):
    """
    Gets the Nth label from a given column.  Returns None if a label does
    not exist.  By default this will return the first label in the list.
    """

    try:
      label = self.getLabelsByCol(col)[n]
    except IndexError:
      label = None
    return label


  def getLabelByKey(self, key, n=0):
    """
    Gets the Nth label from a given key.  Returns None if a label does
    not exist.  By default this will return the first label in the list.
    """

    try:
      label = self.getLabelsByKey(key)[n]
    except IndexError:
      label = None
    return label


  def getLabelByRows(self, rows=None, n=0):
    if rows is None:
      rows = range(self.getDataset().getNumRows())
      
    return map(self.getLabelByRow, rows, [n]*len(rows))


  def getLabelByCols(self, cols=None, n=0):
    if cols is None:
      cols = range(self.getDataset().getNumCols())

    return map(self.getLabelByCol, cols, [n]*len(cols))


  def getLabelByKeys(self, keys=None, n=0):
    if keys is None:
      keys = self.getDataset().getRowKeys() + self.getDataset().getColKeys()

    return map(self.getLabelByKey, keys, [n]*len(keys))

  
  def getRowsByLabel(self, label):
    """
    Returns all the rows which are marked by the label.
    """
    
    #
    # Get the full set of row keys and filter on the intersection of those
    # and the set of keys associated with the label
    #
    
    rowKeys = self.getDataset().getRowKeys()
    keys    = self.getKeysByLabel(label)

    #
    # Do the intersection quickly...the order of the arguements is
    # very important (keys <intersect> rowKeys, not rowKeys <intersect> keys)
    #

    return self._intersect(keys, rowKeys)
    
  def getColsByLabel(self, label):
    """
    Returns all the columns which are marked by the label given.
    """
    #
    # Get the full set of column keys and all the keys associated with the
    # label
    #
    
    colKeys = self.getDataset().getColKeys()
    keys    = self.getKeysByLabel(label)

    #
    # Filter out only the intersection...the order of the arguements is
    # very important!!
    #

    vKeys = self._intersect(colKeys, keys)

    #
    # ...and return their position within the key list
    #

    return  map(colKeys.index, vKeys)
        
  def getKeysByLabel(self, label):
    """
    Returns a list of keys marked by the specified label.
    """

    #
    # get the UID for this label, if the label does not exist, get 0.  Since
    # 0 is an invalid UID, there will be no keys
    #

    uid  = self.lab2uid.get(label, 0)
    return self.getDataset()._getKeysByUID(uid)

  def __isListUnique(self, labeling_contents):
    """
    """
    label_list = []
    for slice in labeling_contents:
      if len(slice) != 1:
        return False
      else:
        label_list.append(slice[0])
      if len(label_list) != len(sets.ImmutableSet(label_list)):
        return False
    return True
  
  def isRowUnique(self):
    """Is this labeling unique with a value for each and every row?
    """
    # is it unique?
    return self.__isListUnique(self.getAllRowLabels())
  
  def isRowLabeling(self):
    """Does this labeling have a label for each row?
    """
    return self.__has_row_labels and not (self.__has_column_labels or self.__has_cell_labels)    
    
    
  def isColUnique(self):
    """Is this labeling unique with a value for each and every column?
    """
    return self.__isListUnique(self.getAllColLabels())

  def isColLabeling(self):
    """Does this labeling have a label for each column?
    """
    return self.__has_column_labels and not (self.__has_row_labels or self.__has_cell_labels)    

  def isNumeric(self):
#    for l in self.getAllKeyLabels():
# JCR: changed to getLabels; only want to test all non-empty (unique) labels
    for l in self.getLabels():
      if type(l) not in (types.IntType, types.FloatType):
        return False
    return True
    
  ###########################################################################
  #
  # Label removal
  #
  ###########################################################################

  def detatch(self):
    """
    Remove the labeling from the dataset.

    This will remove all the data relevant to this labeling from the dataset
    and all references to the dataset.  This method is not generally useful
    to a client program.  Use IDataset.removeLabeling() to remove a labeling
    from a dataset object.
    """

    self.removeAll()
    
    self.dataset = None
    
    self.lab2uid.clear()
    self.uid2lab.clear()

  def removeAll(self):
    """
    Removes all the labels in the labeling and unmarks every key in the dataset
    which is marked by this Labeling.  Labels attatched to keys by other
    labelings are not affected.
    """

    for label in self.lab2uid.keys():
      self.removeLabel(label)
      
  def removeLabel(self, label):
    """
    Removes a label and unmarks all the keys in the dataset associated with
    the label.
    """

    try:
      uid  = self.lab2uid[label] 
      keys = self.getKeysByLabel(label)
      for key in keys:
        self.getDataset()._removeUID(uid, key)
      del self.lab2uid[label]
      del self.uid2lab[uid]
    except:
      pass

  def removeLabelsFromRow(self, row):
    """
    Removes all the labels in this Labeling from a specified row of the
    dataset.
    """
    
    self.removeLabelsFromKey(self.getDataset().getRowKey(row))

  def removeLabelsFromCol(self, col):
    """
    removeLabelsFromCol(row)

    Removes all the labels in this Labeling from a specified column of the
    dataset.
    """

    self.removeLabelsFromKey(self.getDataset().getColKey(col))

  def removeLabelsFromKey(self, key):
    """
    Removes all the labels in this Labeling from a specified key of the
    dataset.
    """
    
    labels = self.getLabelsByKey(key)
    for label in labels:
      self.getDataset()._removeUID(self.lab2uid[label], key)

  def removeLabelFromRow(self, label, row):
    """
    Remove a single label from a given row.
    """

    self.removeLabelFromKey(label, self.getDataset().getRowKey(row))
  
  def removeLabelFromCol(self, label, col):
    """
    Remove a single label from a given column.
    """

    self.removeLabelFromKey(label, self.getDataset().getColKey(col))
    
  def removeLabelFromKey(self, label, key):
    """
    Remove a single label from a single key.
    """

    labels = self.getLabelsByKey(key)
    if label in labels:
      self.getDataset()._removeUID(self.lab2uid[label], key)

  def sizeof(self):
    return self._sizeoflabels(self.uid2lab) * 2

  def _sizeoflabels(self, d):
    """ compute sizes of objects in dictionary """
    size = 0
    for item in d.items():
      for x in item:
        self._sizeofitem(x)
    return size * 2

  def _sizeofitem(self, x):
    if type(x) == types.StringType:
      return len(x)
    elif type(x) == types.UnicodeType:
      return len(x) * 2
    elif type(x) == types.IntType:
      return 4
    elif type(x) == types.LongType:
      return 8
    elif type(x) == types.FloatType:
      return 8
    elif type(x) == types.InstanceType:
      #if hasattr(x, 'sizeof') and not isinstance(x, Labeling):
      #  size += x.sizeof()
      #else:
      return 4
    elif type(x) == types.TupleType:
      return reduce(operator.add, [4] + [self._sizeofitem(y) for y in x])
    else:
      print "unrecognized type <%s> for sizeof" % (str(type(x)))
      return 4
  
  def __str__(self):

    return('%s: %s,  %i unique labels' %
           (self.__init__.im_class.__name__, self.getName(), len(self.getLabels())))
  
  def __repr__(self):
    
    return(self.__str__())

  def _intersect(self, set1, set2):
    inter  = {}
    length = len(set1)
    map(operator.setitem, [inter]*length, set1, [1]*length)
    return filter(inter.has_key, set2)
  
  def _findCommonKeys(self, ds, label):
    """
    Helper subroutine to find the keys in the dataset which correspond to data
    marked by label in the attatched dataset.
    """
  
    lineage1 = self.getDataset().getLineage()
    lineage2 = ds.getLineage()
  
    #
    # For each of the roots in set 1, follow the roots up set 2 and start
    # to build a list of keys.
    #
    
    keys = []
    for rootPath in lineage1:
      paths = filter(lambda x : x[-1] == rootPath[-1], lineage2)
    
      for path in paths:

        #
        # Drill down the UID from ds to path[-1]
        #
      
        uid = self._getUID(label)
        for i in range(len(rootPath)-1):
          uid = rootPath[i]._mapUIDToParent(uid, rootPath[i+1])

        #
        # Now we have the correct root UID, get the keys from the root
        # dataset
        # 
      
        keyset = rootPath[-1]._getKeysByUID(uid)
      
        #
        # Now map this key set up to the destination view
        #
      
        idx = range(len(path)-1)
        idx.reverse()
        for i in idx:
          keyset = path[i]._mapKeysFromParent(keyset, path[i+1])

        #
        # Now we have the keys, add them to the full key set
        #
      
        keys += keyset

    return keys

  def labelFrom(self, labeling, name=None):

    """
    Creates and returns a labeling for the dataset based on the labeling
    of a different view or dataset.
    
    If the given labeling is already attatched to the dataset, no action is
    performed and None is returned. In the case where the Labeling is
    attatched to a different dataset, the common datapoints will be labeled
    with the same labels that Labeling contains.
    
    Example usage:

     view1 = dataset.subsetRows([0,1,2,3,4])
     view2 = dataset.subsetRows([2,3,4,5,6])
     
     labeling1 = Labeling(view1)
     labeling1.addLabelToRows("top5", range(view1.getNumRows()))
     
     //
     // Now use labeling1 to label view2
     //

     labeling2 = Labeling(view2)
     labeling2.labelFrom(labeling1)
     
     //
     // The common datapoints (2,3,4) are now labeled with "top5" and may
     // be retieved
     //
     
     num = len(labeling2.getRowsByLabel("top5"))
     print str(num) + " of my top 5 genes are in this dataset"
    """

    #
    # For each label in the source labeling, resolve the keys in the 
    # dataset, then use that list of (label, keys) tuples to create and
    # return the new labeling
    #
    
    labels = labeling.getLabels()
    
    #
    # ...and fill it in with the new labels
    #

    for label in labels:
      keys = labeling._findCommonKeys(self.getDataset(), label)
      self.addLabelToKeys(label, keys)

class ClusteredLabeling(Labeling):
  """This is a labeling that maintains the information about the clustering
  run that generated it.
  """
  def __init__(self, dataset, algorithm, parameters, model=None, name=None, nolink=0):
    ## FIXME: sometimes when pickling a dataset that has a clustered algorithm
    ## FIXME: attached the pickle code throws an error about the clustering algorithm
    ## FIXME: not being the same e.g. DiagEM.DiagEM is not the same DiagEM.DiagEM
    ## FIXME: So to get things working, I disabled saving the algorithm,
    ## FIXME: perhaps if we just saved the name it'd work.

    #if not issubclass(algorithm, ML_Algorithm):
    #  raise ValueError("algorithm must be a subclass of ML_Algorithm")
    #else:
    #  self.algorithm_class = algorithm
    self.parameters = parameters
    self.model = model
    Labeling.__init__(self, dataset, name, nolink)

def subsetByLabeling(ds, labeling, obj, name=None):
    """
    Creates and returns an arbitrary view based on a valid labeling of the
    dataset.

    If the labeling passed is not directly tied to the dataset object,
    the subset returned will be the subset of self which corresponds
    to the datapoints labeling in the dataset to which the labeling is
    attatched.

    The application of this behavior can be expressed in the following
    situation:

        Given a Dataset, perform two clusterings which result in two
        labelings.  Using these labelings, create two views, one per
        clustering of a particular cluster. Then create a labeling for the
        view which labels all the data.  Now apply this labeling to the
        second view to create a subset of common datapoints. The code to do
        this may be like the following.

        view1 = dataset.subset(labeling1, "cluster 2")
        view2 = dataset.subset(labeling2, "cluster 6")

        newlabeling1 = view1.createLabeling()
        newlabeling1.addLabelToRows("foo", range(view1.getNumRows()))

        commonGenes = view2.subset(newlabeling1, "foo")

    At the conclusion of this code, the commonGenes view will hold the
    data which is in view2 and also labeled "foo" in newlabeling1
    """
    
    #
    # Override the string->filename conversion
    #
    
    if type(obj) == types.StringType:
      obj = [obj]

    labels  = labeling._castLabels(obj)
    keylist = []

    #
    # flatten the label list
    #

    labels = unravel(labels)
    
    #
    # There are two cases -- it is direcly attatched to self, or is it
    # attatched to another dataset/view
    #
    # Fetch a local, context labeling, if None is returned, do the more
    # complicated case
    #
    
    labelings = ds.getLabelings()
    local     = labeling._getLocalRef(ds)
    
    if local is not None:
      
      #
      # Standard subset operation, the labeling is attatched to the dataset
      # in question
      #
        
      for label in labels:
        keylist += labeling.getKeysByLabel(label)
      
    else:

      #
      # Trickier case.  We want to find the common root dataset of this
      # dataset and the dataset associated with the labeling.  To do this
      # we will trace the UIDs to their common root datasets and then derive
      # the keys to subset on
      #
      # This is effectively a join on UID
      #

      for label in labels:
        keylist += labeling._findCommonKeys(ds, label)

    #
    # Remove duplicate keys, and put them in order
    #

    keys = unique(keylist)
    
    # if we're slicing on just one axis of the dataset, default to 
    # including all of the other axis.
    rowKeys, colKeys = ds.splitRowColKeylist(keys)
    if len(rowKeys) == 0 and len(colKeys) == 0:
      # if we have an empty set don't don anything
      pass
    elif len(rowKeys) == 0:
      # we have column keys but no row keys
      keys += ds.getRowKeys()
    elif len(colKeys) == 0:
      # we have row keys but no column keys
      keys += ds.getColKeys()
    else:
      # we split on both axes
      pass
      
    keys.sort()
      
    v = SubsetView(ds, keys, name=name)
      
    return v
