Skip to content

Instantly share code, notes, and snippets.

@cournape
Created April 10, 2014 23:43
Show Gist options
  • Save cournape/10432466 to your computer and use it in GitHub Desktop.
Save cournape/10432466 to your computer and use it in GitHub Desktop.
persistent caching with requests
"""
Hack to use requests + cachecontrol packages to make cached requests on cloud front for secured URLS.
If one redirects urls to protected CF urls, those urls will have expiry parameters that prevents
'normal' caching. This obviously only works if one can guarantee that the content is not affected
by the parameters.
"""
import urlparse
try:
from cPickle import loads, dumps, HIGHEST_PROTOCOL
except ImportError: # Python 3.x
from pickle import loads, dumps, HIGHEST_PROTOCOL
import requests
# from github.com/cournape/sqlite_cache
from sqlite_cache import SQLiteCache
# from github.com/ionrock/cachecontrol
from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.cache import BaseCache
from cachecontrol.controller import CacheController
class QueryPathOnlyCacheController(CacheController):
"""
A cache controller that caches entries based solely on scheme, hostname and
path.
"""
def cache_url(self, uri):
url = super(QueryPathOnlyCacheController, self).cache_url(uri)
p = urlparse.urlparse(url)
return urlparse.urlunparse((p.scheme, p.hostname, p.path, "", "", ""))
class DBCache(BaseCache):
"""
A Sqlite-backed cache.
Using sqlite guarantees data consistency without much overhead and without the need
of usually brittle file locks, or external services (impractical in many cases)
"""
def __init__(self, uri=":memory:", capacity=10):
self._cache = SQLiteCache(uri, capacity)
def _encode_key(self, key):
return base64.b64encode(key)
def _decode_key(self, encoded_key):
return base64.b64decode(encoded_key)
def _encode_value(self, value):
data = dumps(value, protocol=HIGHEST_PROTOCOL)
return buffer(data)
def _decode_value(self, encoded_value):
return loads(str(encoded_value))
def get(self, key):
encoded_value = self._cache.get(self._encode_key(key))
if encoded_value is not None:
return self._decode_value(encoded_value)
else:
return None
def set(self, key, value):
self._cache.set(self._encode_key(key), self._encode_value(value))
def delete(self, key):
self._cache.delete(self._encode_key(key))
if __name__ == "__main__":
cache = DBCache("foo.db")
adapter = CacheControlAdapter(cache, controller_class=QueryPathOnlyCacheController)
s = requests.Session()
s.mount("http://", adapter)
# Now
s.get("http://cloufront-storage.acme.com/some/data?Expires=1397176687&Signature=abcde....")
# That second GET is cached (assuming the underlying content has not changed and etag value
# stayed the same
s.get("http://cloufront-storage.acme.com/some/data?Expires=1397176693&Signature=abcde....")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment