Created
May 28, 2022 14:28
-
-
Save Animenosekai/4e5a3a980e7ed2e542003a58a54ede96 to your computer and use it in GitHub Desktop.
a size limited LRU cache implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
lru.py | |
LRU cache implementation for the translatepy project. | |
""" | |
import gc | |
import sys | |
from collections import OrderedDict | |
class SizeLimitedLRUCache(OrderedDict): | |
""" | |
Special implementation of a size limited LRU cache. | |
""" | |
def __init__(self, *args, max_size: int = 1e+7, **kwds): | |
""" | |
Initialize a cache object with a maximum cache size. | |
Parameters | |
---------- | |
max_size : int | |
Maximum size of the cache in bytes. | |
""" | |
self.max_size = int(max_size) | |
super().__init__(*args, **kwds) | |
def __getitem__(self, key): | |
""" | |
Get an item from the cache. | |
Parameters | |
---------- | |
key : Any | |
The key to get the item for. | |
Returns | |
------- | |
Any | |
The item for the given key. | |
Notes | |
----- | |
- Calling self.__getitem__(key) is equivalent to calling self[key]. | |
- This is considered as an operation made to the cache and the object will be placed at the end. | |
""" | |
value = super().__getitem__(key) | |
self.move_to_end(key) | |
return value | |
def __setitem__(self, key, value): | |
""" | |
Set an item in the cache. | |
Parameters | |
---------- | |
key : Any | |
The key to set the item for. | |
value : Any | |
The item to set. | |
Notes | |
----- | |
- Calling self.__setitem__(key, value) is equivalent to calling self[key] = value. | |
- This is considered as an operation made to the cache and the object will be placed at the end. | |
""" | |
new_obj_size = self.get_size(value) | |
while self.get_size(self) + new_obj_size > self.max_size: | |
oldest = next(iter(self)) | |
del self[oldest] | |
super().__setitem__(key, value) | |
self.move_to_end(key) | |
def get_size(self, obj, builtin: bool = False): | |
""" | |
Get the size of an object. | |
Parameters | |
---------- | |
obj : Any | |
The object to get the size of. | |
builtin : bool | |
Whether to use the builtin sys.getsizeof function or not. | |
This function is faster than the custom implementation but might yield inaccurate results for complex objects. | |
Returns | |
------- | |
int | |
The size of the object in bytes. | |
Notes | |
----- | |
The second implementation of this function is based on the following article: | |
https://towardsdatascience.com/the-strange-size-of-python-objects-in-memory-ce87bdfbb97f | |
""" | |
if builtin: | |
return sys.getsizeof(obj) | |
memory_size = 0 | |
ids = set() | |
objects = [obj] | |
while objects: | |
new = [] | |
for obj in objects: | |
if id(obj) not in ids: | |
ids.add(id(obj)) | |
memory_size += sys.getsizeof(obj) | |
new.append(obj) | |
objects = gc.get_referents(*new) | |
return memory_size |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment