Skip to content

Instantly share code, notes, and snippets.

@Animenosekai
Created May 28, 2022 14:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Animenosekai/4e5a3a980e7ed2e542003a58a54ede96 to your computer and use it in GitHub Desktop.
Save Animenosekai/4e5a3a980e7ed2e542003a58a54ede96 to your computer and use it in GitHub Desktop.
a size limited LRU cache implementation
"""
lru.py
LRU cache implementation for the translatepy project.
"""
import gc
import sys
from collections import OrderedDict
class SizeLimitedLRUCache(OrderedDict):
"""
Special implementation of a size limited LRU cache.
"""
def __init__(self, *args, max_size: int = 1e+7, **kwds):
"""
Initialize a cache object with a maximum cache size.
Parameters
----------
max_size : int
Maximum size of the cache in bytes.
"""
self.max_size = int(max_size)
super().__init__(*args, **kwds)
def __getitem__(self, key):
"""
Get an item from the cache.
Parameters
----------
key : Any
The key to get the item for.
Returns
-------
Any
The item for the given key.
Notes
-----
- Calling self.__getitem__(key) is equivalent to calling self[key].
- This is considered as an operation made to the cache and the object will be placed at the end.
"""
value = super().__getitem__(key)
self.move_to_end(key)
return value
def __setitem__(self, key, value):
"""
Set an item in the cache.
Parameters
----------
key : Any
The key to set the item for.
value : Any
The item to set.
Notes
-----
- Calling self.__setitem__(key, value) is equivalent to calling self[key] = value.
- This is considered as an operation made to the cache and the object will be placed at the end.
"""
new_obj_size = self.get_size(value)
while self.get_size(self) + new_obj_size > self.max_size:
oldest = next(iter(self))
del self[oldest]
super().__setitem__(key, value)
self.move_to_end(key)
def get_size(self, obj, builtin: bool = False):
"""
Get the size of an object.
Parameters
----------
obj : Any
The object to get the size of.
builtin : bool
Whether to use the builtin sys.getsizeof function or not.
This function is faster than the custom implementation but might yield inaccurate results for complex objects.
Returns
-------
int
The size of the object in bytes.
Notes
-----
The second implementation of this function is based on the following article:
https://towardsdatascience.com/the-strange-size-of-python-objects-in-memory-ce87bdfbb97f
"""
if builtin:
return sys.getsizeof(obj)
memory_size = 0
ids = set()
objects = [obj]
while objects:
new = []
for obj in objects:
if id(obj) not in ids:
ids.add(id(obj))
memory_size += sys.getsizeof(obj)
new.append(obj)
objects = gc.get_referents(*new)
return memory_size
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment