Created
June 9, 2023 17:22
-
-
Save spbnick/80fb3f28ba9fe0fe4e2a3b9a383215fb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Cache: | |
def __init__(self, bucket, max_store_size): | |
""" | |
Initialize a cache client. | |
Args: | |
bucket: The name of the S3 bucket containing the cache. | |
max_store_size: Maximum size the file can have to be stored. | |
""" | |
# Create the GCS client here | |
self.client = create s3 client | |
def store(self, url): | |
""" | |
Attempt to store a URL in the cache. The URL contents is not | |
downloaded if it's already in the cache or, if it doesn't match the | |
requirements (max_cached_size). | |
Args: | |
url: The URL to try to cache. | |
""" | |
# Use created GCS client here | |
def _format_object_name(self, url): | |
""" | |
Format a cache object name for a given (potentially) cached URL. | |
Does not access the S3 storage. | |
Args: | |
url: The (potentially) cached URL to format the object name | |
for. | |
Returns: | |
The object name of the (potentially) cached URL. | |
""" | |
return some_hash(url) | |
def _format_public_url(self, url): | |
""" | |
Format a public URL for a given (potentially) cached URL. | |
Does not access the S3 storage. | |
Args: | |
url: The (potentially) cached URL to format the public URL for. | |
Returns: | |
The public URL of the (potentially) cached URL. | |
""" | |
return self.bucket_public_url + "/" + self._format_object_name(url) | |
def map(self, url): | |
""" | |
Map a URL to the public URL of its cached contents, if it is cached. | |
Args: | |
url: The potentially-cached URL to map. | |
Returns: | |
The public URL of the cached content, if the URL is cached. | |
None if the URL is not cached. | |
""" | |
# Generate object name | |
object_name = self._format_object_name(url) | |
# If object exists | |
return self._format_public_url(url) | |
# else | |
return None | |
def is_stored(self, url): | |
""" | |
Check if a URL is stored in the cache, or not. | |
Args: | |
url: The URL to check. | |
Returns: | |
True if the URL is cached, False if not. | |
""" | |
return self.map(url) is not None | |
def fetch(self, url): | |
""" | |
Retrieve the contents of a URL, if cached. | |
Args: | |
url: The URL to retrieve the cached content of. | |
Returns: | |
The binary contents of the cached URL, or None, if not cached. | |
""" | |
cache = Cache("bucket_name", 5 * 1024 * 1024) | |
# Cache these URLs | |
url_list = ["url1", "url2", "url3"] | |
for url in url_list: | |
cache.store(url) | |
# When handling a request for a URL | |
url_to_fetch = "foobar" | |
redirect_to = cache.map(url) or url | |
# When doing the triaging of a file at a url | |
url = "baz" | |
content = cache.fetch(url) | |
if content is not None: | |
text = content.decode("utf-8") | |
if re.search("Could not find a block device", text): | |
return "Found the issue!" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment