Skip to content

Instantly share code, notes, and snippets.

@spbnick
Created June 9, 2023 17:22
Show Gist options
  • Save spbnick/80fb3f28ba9fe0fe4e2a3b9a383215fb to your computer and use it in GitHub Desktop.
Save spbnick/80fb3f28ba9fe0fe4e2a3b9a383215fb to your computer and use it in GitHub Desktop.
class Cache:
def __init__(self, bucket, max_store_size):
"""
Initialize a cache client.
Args:
bucket: The name of the S3 bucket containing the cache.
max_store_size: Maximum size the file can have to be stored.
"""
# Create the GCS client here
self.client = create s3 client
def store(self, url):
"""
Attempt to store a URL in the cache. The URL contents is not
downloaded if it's already in the cache or, if it doesn't match the
requirements (max_cached_size).
Args:
url: The URL to try to cache.
"""
# Use created GCS client here
def _format_object_name(self, url):
"""
Format a cache object name for a given (potentially) cached URL.
Does not access the S3 storage.
Args:
url: The (potentially) cached URL to format the object name
for.
Returns:
The object name of the (potentially) cached URL.
"""
return some_hash(url)
def _format_public_url(self, url):
"""
Format a public URL for a given (potentially) cached URL.
Does not access the S3 storage.
Args:
url: The (potentially) cached URL to format the public URL for.
Returns:
The public URL of the (potentially) cached URL.
"""
return self.bucket_public_url + "/" + self._format_object_name(url)
def map(self, url):
"""
Map a URL to the public URL of its cached contents, if it is cached.
Args:
url: The potentially-cached URL to map.
Returns:
The public URL of the cached content, if the URL is cached.
None if the URL is not cached.
"""
# Generate object name
object_name = self._format_object_name(url)
# If object exists
return self._format_public_url(url)
# else
return None
def is_stored(self, url):
"""
Check if a URL is stored in the cache, or not.
Args:
url: The URL to check.
Returns:
True if the URL is cached, False if not.
"""
return self.map(url) is not None
def fetch(self, url):
"""
Retrieve the contents of a URL, if cached.
Args:
url: The URL to retrieve the cached content of.
Returns:
The binary contents of the cached URL, or None, if not cached.
"""
cache = Cache("bucket_name", 5 * 1024 * 1024)
# Cache these URLs
url_list = ["url1", "url2", "url3"]
for url in url_list:
cache.store(url)
# When handling a request for a URL
url_to_fetch = "foobar"
redirect_to = cache.map(url) or url
# When doing the triaging of a file at a url
url = "baz"
content = cache.fetch(url)
if content is not None:
text = content.decode("utf-8")
if re.search("Could not find a block device", text):
return "Found the issue!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment