import md5
import operator
import os
import shelve
import string
import tempfile
import types

from compClust.gui.PlotCache import PlotCache
from compClust.gui.DataSource import DataSource

class DataManager(object):
  """Collect loaded datasets to simplify browsing

  FIXME: the shelves persisiting the datasets can and will be clobbered
  FIXME: if there are multiple users.
  """
  def __init__(self, cache_dir = None, shelf_name="compclust.shelf", shelf_id_name="compclust_id.shelf"):
    self.cache = {}
    if cache_dir is None:
      # try for a user directory
      cache_dir = os.path.expanduser("~")
      if not os.path.isdir(cache_dir):
        cache_dir = tempfile.gettempdir()
      cache_dir = os.path.join(cache_dir, ".compclust")
    self.cache_dir = cache_dir
    self.__shelf_name = shelf_name
    self.__shelf_id_name = shelf_id_name
    
  def __get_id__(self, datasource_id):
    """Get id for a dataset object
    """
    if datasource_id is None:
      raise ValueError("Datasource id must be a valid id")
    elif isinstance(datasource_id, DataSource):
      datasource_id = datasource_id.id
    return datasource_id
  
  def __getitem__(self, datasource_id):
    datasource_id = self.__get_id__(datasource_id)
    datasource = self.cache.get(datasource_id, None)
    if datasource is None:
      # if the first ID didn't work try looking it up as an integer in
      # the cache index. (This is important because the shelf
      # code doesn't like integers as keys.
      if type(datasource_id) == types.IntType:
        return self.cache.values()[datasource_id]
      else:
        # everything failed, so try to pull it out of the shelf.
        try:
          data_shelf = shelve.open(self.shelf_name)
          datasource = data_shelf.get(datasource_id, None)
        except:
          # FIXME: should put a warning message about what kind of error
          # FIXME: happened here
          pass
        if datasource is None:
          raise KeyError(datasource_id)
        self.cache[datasource_id] = datasource
    return datasource

  # define properties
  def __get_cache_dir(self):
    return self.__cache_dir
  def __set_cache_dir(self, dir):
    if not os.path.isdir(dir):
      os.mkdir(dir)
    self.__cache_dir = dir
  cache_dir = property(__get_cache_dir, __set_cache_dir, doc="base dir to store objects we can't easily reload.")

  def __get_shelf_name(self):
    if os.path.isabs(self.__shelf_name):
      return self.__shelf_name
    else:
      return os.path.join(self.__cache_dir, self.__shelf_name)
  def __set_shelf_name(self, name):
    self.__shelf_name = name
  shelf_name = property(__get_shelf_name, __set_shelf_name, doc="filename to store datasource meta information in, if not absolute it will be relative to self.cache_dir")
  def __get_shelf(self):
    return shelve.open(self.shelf_name)
  shelf = property(__get_shelf, doc="return a the datasource meta information shelf")

  def __get_shelf_id_name(self):
    if os.path.isabs(self.__shelf_id_name):
      return self.__shelf_id_name
    else:
      return os.path.join(self.__cache_dir, self.__shelf_id_name)
  def __set_shelf_id_name(self, name):
    self.__shelf_id_name = name
  shelf_id_name = property(__get_shelf_id_name, __set_shelf_id_name, doc="filename to store datasource meta information in, if not absolute it will be relative to self.cache_dir")
  def __get_shelf_id(self):
    return shelve.open(self.shelf_id_name)
  shelf_id = property(__get_shelf_id, doc="return the id to datasource mapping shelf")

  def append(self, datasource):
    """Add dataset to our cache
    """
    if isinstance(datasource, DataSource):
      self.cache[datasource.id] = datasource
    else:
      raise ValueError("%s cannot be a member of a DataManager cache" % \
                       (str(type(datasource))))
    # set location to persist to if we don't have a source
    datasource.cache_dir = self.cache_dir
    
  def browse_list(self):
    """Return list of loaded datasets and their labelings

    Tuple returned has the following attributes
    datasource_id, datasource_name, list of attached labelings, isloaded_flag
    """
    browse = []
    # browse loaded objects
    for datasource_id, datasource in self.cache.items():
      labels = [ (id(l), l.getName())
                 for l in datasource.dataset.getLabelings()]
      
      browse.append((datasource_id, datasource.getShortenedName(), labels, True))
    # browse persisted objects
    for datasource_id, name in self.shelf_id.values():
      # only add the unloaded ones
      if not self.cache.has_key(datasource_id):
        browse.append((datasource_id, name, [], False))
    return browse

  def delete(self, datasource_id):
    """Remove datasource from shelf
    """
    datasource_id = self.__get_id__(datasource_id)
    data_shelf = self.shelf
    id_shelf = self.shelf_id
    datasource = data_shelf[datasource_id]
    del data_shelf[datasource_id]
    del id_shelf[datasource_id]
    persist_dir = os.path.join(self.cache_dir, datasource_id)
    if datasource.is_cached_file and os.path.isdir(persist_dir):
      os.remove(os.path.join(persist_dir, "dataset"))

  def items(self):
    return self.cache.items()

  def __len__(self):
    return len(self.cache)
  
  def keys(self):
    return self.cache.keys()

  def persist(self, datasource_id):
    """Save datasource into shelf
    """
    # FIXME: disable persistence
    # FIXME: persistence breaks clustering in compclustweb
    return
    datasource_id = self.__get_id__(datasource_id)
    datasource = self.cache[datasource_id]
    data_shelf = self.shelf
    id_shelf = self.shelf_id
    data_shelf[datasource_id] = datasource
    id_shelf[datasource_id]=(datasource_id, datasource.getShortenedName())
    data_shelf.close()
    id_shelf.close()

  def persist_all(self):
    """Save all datasources to disk
    """
    # FIXME: Not tested
    for datasource_id in self.cache.keys():
      self.persist(datasource_id)
      
  def save_temp_file(self, tempfile, datasource ):
    """Store a temporary file someplace a little more permanent.
    """
    path = os.path.join(self.cache_dir, datasource.id)
    if not os.path.isdir(path):
      os.mkdir(path)
    savefilename = os.path.join(path, tempfile.orig_filename)
    savefile = open(savefilename, 'w')
    for l in tempfile.read(1024):
      savefile.write(l)
    savefile.close()
    return savefilename

  def sizeof(self):
    """Estimate memory usage
    """
    return reduce(operator.add,
                  [x.sizeof() for x in self.cache.values()])
  
  def unload(self, datasource_id):
    """Remvoe datasource from memory cache

    Catch the returned datasource if you want to ask the user about
    saving it.
    """
    datasource_id = self.__get_id__(datasource_id)
    datasource = self.cache[datasource_id]
    del self.cache[datasource_id]
    return datasource

  def values(self):
    return self.cache.values()
