######################################################################## # # License: BSD # Created: Aug 13, 2006 # Author: Francesc Alted - faltet@pytables.com # # $Id: $ # ######################################################################## """Cython interface for several LRU cache systems. Classes (type extensions): NodeCache ObjectCache NumCache Functions: Misc variables: """ cdef extern from "Python.h": int PyUnicode_Compare(object, object) import sys import numpy from libc.string cimport memcpy, strcmp from cpython.unicode cimport PyUnicode_Check from numpy cimport import_array, ndarray, PyArray_DATA from .parameters import (DISABLE_EVERY_CYCLES, ENABLE_EVERY_CYCLES, LOWEST_HIT_RATIO) #---------------------------------------------------------------------------- # Initialization code. # The numpy API requires this function to be called before # using any numpy facilities in an extension module. import_array() #---------------------------------------------------------------------------- # ------- Minimalist NodeCache for nodes in PyTables --------- # The next NodeCache code relies on the fact that a node that is # fetched from the cache will be removed from it. Said in other words: # "A node cannot be alive and dead at the same time." # Thanks to the above behaviour, the next code has been stripped down # to a bare minimum (the info in cache is kept in just 2 lists). #*********************** Important note! ***************************** # The code behind has been carefully tuned to serve the needs of # PyTables cache for nodes. As a consequence, it is no longer # appropriate as a general LRU cache implementation. You have been # warned!. F. Alted 2006-08-08 #********************************************************************* cdef class NodeCache: """Least-Recently-Used (LRU) cache for PyTables nodes.""" def __init__(self, nslots): """Maximum nslots of the cache. If more than 'nslots' elements are added to the cache, the least-recently-used ones will be discarded. """ if nslots < 0: raise ValueError("Negative number (%s) of slots!" % nslots) self.nslots = nslots self.nextslot = 0 self.nodes = [] self.paths = [] def __len__(self): return len(self.nodes) def __setitem__(self, path, node): self.setitem(path, node) cdef setitem(self, object path, object node): """Puts a new node in the node list.""" if self.nslots == 0: # Oops, the cache is set to empty return # Check if we are growing out of space if self.nextslot == self.nslots: # It is critical to reduce nextslot *before* the preemption of # the LRU node. If not, this can lead with problems in situations # with very small caches (length 1 or so). # F. Alted 2008-10-22 self.nextslot = self.nextslot - 1 # Remove the LRU node and path (the start of the lists) del self.nodes[0] del self.paths[0] # The equality protection has been put for situations in which a # node is being preempted and added simultaneously (with very small # caches). if len(self.nodes) == len(self.paths): # Add the node and path to the end of its lists self.nodes.append(node) self.paths.append(path) self.nextslot = self.nextslot + 1 def __contains__(self, path): if self.getslot(path) == -1: return 0 else: return 1 cdef long getslot(self, object path): """Checks whether path is in this cache or not.""" cdef long i, nslot, compare nslot = -1 # -1 means not found if PyUnicode_Check(path): # Start looking from the trailing values (most recently used) for i from self.nextslot > i >= 0: #if strcmp(encoded_path, self.paths[i]) == 0: if PyUnicode_Compare(path, self.paths[i]) == 0: nslot = i break else: # Start looking from the trailing values (most recently used) for i from self.nextslot > i >= 0: #if strcmp(path, self.paths[i]) == 0: if PyUnicode_Check(self.paths[i]): compare = PyUnicode_Compare(path, self.paths[i]) else: compare = strcmp(path, self.paths[i]) if compare == 0: nslot = i break return nslot __marker = object() def pop(self, path, d=__marker): try: node = self.cpop(path) except KeyError: if d is not self.__marker: return d else: raise else: return node cdef object cpop(self, object path): cdef long nslot nslot = self.getslot(path) if nslot == -1: raise KeyError(path) else: node = self.nodes[nslot] del self.nodes[nslot] del self.paths[nslot] self.nextslot = self.nextslot - 1 return node def __iter__(self): # Do a copy of the paths list because it can be modified in the middle of # the iterator! copy = self.paths[:] return iter(copy) def __repr__(self): return "<%s (%d elements)>" % (str(self.__class__), len(self.paths)) ######################################################################## # Common code for other LRU cache classes ######################################################################## cdef class BaseCache: """Base class that implements automatic probing/disabling of the cache.""" def __init__(self, long nslots, object name): if nslots < 0: raise ValueError("Negative number (%s) of slots!" % nslots) self.setcount = 0; self.getcount = 0; self.containscount = 0 self.enablecyclecount = 0; self.disablecyclecount = 0 self.iscachedisabled = False # Cache is enabled by default self.disableeverycycles = DISABLE_EVERY_CYCLES self.enableeverycycles = ENABLE_EVERY_CYCLES self.lowesthr = LOWEST_HIT_RATIO self.nprobes = 0.0; self.hitratio = 0.0 self.nslots = nslots self.seqn_ = 0; self.nextslot = 0 self.name = name self.incsetcount = False # The array for keeping the access times (using long ints here) self.atimes = numpy.zeros(shape=nslots, dtype=numpy.int_) self.ratimes = PyArray_DATA(self.atimes) def __len__(self): return self.nslots # Machinery for determining whether the hit ratio is being effective # or not. If not, the cache will be disabled. The efficency will be # checked every cycle (the time that the cache would be refilled # completely). In situations where the cache is not being re-filled # (i.e. it is not enabled) for a long time, it is forced to be # re-enabled when a certain number of cycles has passed so as to # check whether a new scenario where the cache can be useful again # has come. # F. Alted 2006-08-09 cdef int checkhitratio(self): cdef double hitratio cdef long nslot if self.setcount > self.nslots: self.disablecyclecount = self.disablecyclecount + 1 self.enablecyclecount = self.enablecyclecount + 1 self.nprobes = self.nprobes + 1 hitratio = self.getcount / self.containscount self.hitratio = self.hitratio + hitratio # Reset the hit counters self.setcount = 0; self.getcount = 0; self.containscount = 0 if (not self.iscachedisabled and self.disablecyclecount >= self.disableeverycycles): # Check whether the cache is being effective or not if hitratio < self.lowesthr: # Hit ratio is low. Disable the cache. self.iscachedisabled = True else: # Hit ratio is acceptable. (Re-)Enable the cache. self.iscachedisabled = False self.disablecyclecount = 0 if self.enablecyclecount >= self.enableeverycycles: # We have reached the time for forcing the cache to act again self.iscachedisabled = False self.enablecyclecount = 0 return not self.iscachedisabled def couldenablecache(self): return self.couldenablecache_() # Check whether the cache is enabled or *could* be enabled in the next # setitem operation. This method can be used in order to probe whether # an (expensive) operation to be done before a .setitem() is worth the # effort or not. cdef int couldenablecache_(self): if self.nslots == 0: return False # Increment setitem because it can be that .setitem() doesn't # get called after calling this. self.setcount = self.setcount + 1; self.incsetcount = True if self.iscachedisabled: if self.setcount == self.nslots: # The cache *could* be enabled in the next setitem operation return True else: return False else: return True # Increase the access time (implemented as a C long sequence) cdef long incseqn(self): self.seqn_ = self.seqn_ + 1 if self.seqn_ < 0: # Ooops, the counter has run out of range! Reset all the access times. self.atimes[:] = sys.maxint # Set the counter to 1 (to indicate that it is newer than existing ones) self.seqn_ = 1 return self.seqn_ def __repr__(self): return "<%s(%s) (%d elements)>" % (self.name, str(self.__class__), self.nslots) ######################################################################## # Helper class for ObjectCache ######################################################################## cdef class ObjectNode: """Record of a cached value. Not for public consumption.""" def __init__(self, object key, object obj, long nslot): object.__init__(self) self.key = key self.obj = obj self.nslot = nslot def __repr__(self): return "<%s %s (slot #%s) => %s>" % (self.__class__, self.key, self.nslot, self.object) ######################################################################## # Minimalistic LRU cache implementation for general python objects # This is a *true* general lru cache for python objects ######################################################################## cdef class ObjectCache(BaseCache): """Least-Recently-Used (LRU) cache specific for python objects.""" def __init__(self, long nslots, long maxcachesize, object name): """Maximum size of the cache. If more than 'nslots' elements are added to the cache, the least-recently-used ones will be discarded. Parameters: nslots - The number of slots in cache name - A descriptive name for this cache """ super().__init__(nslots, name) self.cachesize = 0 self.maxcachesize = maxcachesize # maxobjsize will be the same as the maximum cache size self.maxobjsize = maxcachesize self.__list = [None]*nslots self.__dict = {} self.mrunode = None # Most Recent Used node # The array for keeping the object size (using long ints here) self.sizes = numpy.zeros(shape=nslots, dtype=numpy.int_) self.rsizes = PyArray_DATA(self.sizes) # Clear cache cdef clearcache_(self): self.__list = [None]*self.nslots self.__dict = {} self.mrunode = None self.cachesize = 0 self.nextslot = 0 self.seqn_ = 0 # Remove a slot (if it exists in cache) cdef removeslot_(self, long nslot): cdef ObjectNode node assert nslot < self.nslots, "Attempting to remove beyond cache capacity." node = self.__list[nslot] if node is not None: self.__list[nslot] = None del self.__dict[node.key] self.cachesize = self.cachesize - self.rsizes[nslot] self.rsizes[nslot] = 0 if self.mrunode and self.mrunode.nslot == nslot: self.mrunode = None # The next slot to be updated will be this one self.nextslot = nslot # Update a slot cdef updateslot_(self, long nslot, long size, object key, object value): cdef ObjectNode node, oldnode cdef long nslot1, nslot2 cdef object lruidx assert nslot < self.nslots, "Number of nodes exceeding cache capacity." # Remove the previous nslot self.removeslot_(nslot) # Protection against too large data cache size while size + self.cachesize > self.maxcachesize: # Remove the LRU node among the 10 largest ones largidx = self.sizes.argsort()[-10:] nslot1 = self.atimes[largidx].argmin() nslot2 = largidx[nslot1] self.removeslot_(nslot2) # Insert the new one node = ObjectNode(key, value, nslot) self.ratimes[nslot] = self.incseqn() self.rsizes[nslot] = size self.__list[nslot] = node self.__dict[key] = node self.mrunode = node self.cachesize = self.cachesize + size # The next slot to update will be the LRU self.nextslot = self.atimes.argmin() # Put the object to the data in cache (for Python calls) def setitem(self, object key, object value, object size): return self.setitem_(key, value, size) # Put the object in cache (for cython calls) # size can be the exact size of the value object or an estimation. cdef long setitem_(self, object key, object value, long size): cdef long nslot if self.nslots == 0: # The cache has been set to empty return -1 nslot = -1 # Perhaps setcount has been already incremented in couldenablecache() if not self.incsetcount: self.setcount = self.setcount + 1 else: self.incsetcount = False if size > self.maxobjsize: # Check if the object is too large return -1 if self.checkhitratio(): nslot = self.nextslot self.updateslot_(nslot, size, key, value) else: # Empty the cache because it is not effective and it is taking space self.clearcache_() return nslot # Tells whether the key is in cache or not def __contains__(self, object key): return self.__dict.has_key(key) # Tells in which slot the key is. If not found, -1 is returned. def getslot(self, object key): return self.getslot_(key) # Tells in which slot the key is. If not found, -1 is returned. cdef long getslot_(self, object key): cdef ObjectNode node if self.nslots == 0: # The cache has been set to empty return -1 self.containscount = self.containscount + 1 # Give a chance to the MRU node node = self.mrunode if node and node.key == key: return node.nslot # No luck. Look in the dictionary. node = self.__dict.get(key) if node is None: return -1 return node.nslot # Return the object to the data in cache (for Python calls) def getitem(self, object nslot): return self.getitem_(nslot) # Return the object to the data in cache (for cython calls) cdef object getitem_(self, long nslot): cdef ObjectNode node self.getcount = self.getcount + 1 node = self.__list[nslot] self.ratimes[nslot] = self.incseqn() self.mrunode = node return node.obj def __repr__(self): if self.nprobes > 0: hitratio = self.hitratio / self.nprobes else: hitratio = self.getcount / self.containscount return """<%s(%s) (%d maxslots, %d slots used, %.3f KB cachesize, hit ratio: %.3f, disabled? %s)> """ % (self.name, str(self.__class__), self.nslots, self.nextslot, self.cachesize / 1024., hitratio, self.iscachedisabled) ################################################################### # Minimalistic LRU cache implementation for numerical data ################################################################### # The next code is more efficient in situations where efficiency is low. ################################################################### #*********************** Important note! **************************** # The code behind has been carefully tuned to serve the needs of # caching numerical data. As a consequence, it is no longer appropriate # as a general LRU cache implementation. You have been warned!. # F. Alted 2006-08-09 #******************************************************************** cdef class NumCache(BaseCache): """Least-Recently-Used (LRU) cache specific for Numerical data.""" def __init__(self, object shape, object dtype, object name): """Maximum size of the cache. If more than 'nslots' elements are added to the cache, the least-recently-used ones will be discarded. Parameters: shape - The rectangular shape of the cache (nslots, nelemsperslot) itemsize - The size of the element base in cache name - A descriptive name for this cache """ cdef long nslots nslots = shape[0]; self.slotsize = shape[1] if nslots >= 1<<16: # nslots can't be higher than 2**16. Will silently trunk the number. nslots = ((1<<16)-1) # Cast makes cython happy here super().__init__(nslots, name) self.itemsize = dtype.itemsize self.__dict = {} # The cache object where all data will go # The last slot is to allow the setitem1_ method to still return # a valid scratch area for writing purposes self.cacheobj = numpy.empty(shape=(nslots+1, self.slotsize), dtype=dtype) self.rcache = PyArray_DATA(self.cacheobj) # The array for keeping the keys of slots self.keys = (-numpy.ones(shape=nslots, dtype=numpy.int64)) self.rkeys = PyArray_DATA(self.keys) # Returns the address of nslot cdef void *getaddrslot_(self, long nslot): if nslot >= 0: return self.rcache + nslot * self.slotsize * self.itemsize else: return self.rcache + self.nslots * self.slotsize * self.itemsize def setitem(self, long long key, ndarray nparr, long start): return self.setitem_(key, PyArray_DATA(nparr), start) # Copy the new data into a cache slot cdef long setitem_(self, long long key, void *data, long start): cdef long nslot nslot = self.setitem1_(key) if nslot >= 0: # Copy the data to cache memcpy(self.rcache + nslot * self.slotsize * self.itemsize, data + start * self.itemsize, self.slotsize * self.itemsize) return nslot # Return a cache data pointer appropriate to save data. # Even if the cache is disabled, this will return a -1, which is # the last element in the cache. # This version avoids a memcpy of data, but the user should be # aware that data in nslot cannot be overwritten! cdef long setitem1_(self, long long key): cdef long nslot cdef object key2 if self.nslots == 0: # Oops, the cache is set to empty return -1 # Perhaps setcount has been already incremented in couldenablecache() if not self.incsetcount: self.setcount = self.setcount + 1 else: self.incsetcount = False nslot = -1 if self.checkhitratio(): # Check if we are growing out of space if self.nextslot == self.nslots: # Get the least recently used slot nslot = self.atimes.argmin() # Remove the slot from the dict key2 = self.keys[nslot] del self.__dict[key2] self.nextslot = self.nextslot - 1 else: # Get the next slot available nslot = self.nextslot # Insert the slot in the dictionary self.__dict[key] = nslot self.keys[nslot] = key self.ratimes[nslot] = self.incseqn() self.nextslot = self.nextslot + 1 # The next reduces the performance of the cache in scenarios where # the efficicency is near to zero. I don't understand exactly why. # F. Alted 24-03-2008 elif self.nextslot > 0: # Empty the cache if needed self.__dict.clear() self.nextslot = 0 return nslot def getslot(self, long long key): return self.getslot_(key) # Tells in which slot key is. If not found, -1 is returned. cdef long getslot_(self, long long key): cdef object nslot self.containscount = self.containscount + 1 if self.nextslot == 0: # No chances for finding a slot return -1 try: nslot = self.__dict[key] except KeyError: return -1 return nslot def getitem(self, long nslot, ndarray nparr, long start): self.getitem_(nslot, PyArray_DATA(nparr), start) # This version copies data in cache to data+start. # The user should be responsible to provide a large enough data buffer # to keep all the data. cdef getitem_(self, long nslot, void *data, long start): cdef void *cachedata cachedata = self.getitem1_(nslot) # Copy the data in cache to destination memcpy(data + start * self.itemsize, cachedata, self.slotsize * self.itemsize) # Return the pointer to the data in cache # This version avoids a memcpy of data, but the user should be # aware that data in nslot cannot be overwritten! cdef void *getitem1_(self, long nslot): self.getcount = self.getcount + 1 self.ratimes[nslot] = self.incseqn() return self.rcache + nslot * self.slotsize * self.itemsize def __repr__(self): cachesize = (self.nslots * self.slotsize * self.itemsize) / 1024. if self.nprobes > 0: hitratio = self.hitratio / self.nprobes elif self.containscount > 0: hitratio = self.getcount / self.containscount else: hitratio = numpy.nan return """<%s(%s) (%d maxslots, %d slots used, %.3f KB cachesize, hit ratio: %.3f, disabled? %s)> """ % (self.name, str(self.__class__), self.nslots, self.nextslot, cachesize, hitratio, self.iscachedisabled) ## Local Variables: ## mode: python ## py-indent-offset: 2 ## tab-width: 2 ## fill-column: 78 ## End: