Skip to content

Instantly share code, notes, and snippets.

@iainelder
Created October 6, 2013 19:00
Show Gist options
  • Save iainelder/6857708 to your computer and use it in GitHub Desktop.
Save iainelder/6857708 to your computer and use it in GitHub Desktop.
requests_cache is a library that caches responses returned by the requests library. This gist shows you how to dump the cache content to the shell. You can take this as a base for saving resposes to file for archiving. Using a cache allows you to implement the arching logic seperately from the fetching logic.
import requests
import requests_cache
from urlparse import urlparse
from os.path import basename
from pprint import pprint
# By default requests uses urlib3
# <class 'requests.packages.urllib3.response.HTTPResponse'>
resp = requests.get('http://httpbin.org/user-agent')
print type(resp.raw)
# requests_cache monkey-patches requests with a global cache
# Default backend is sqlite. Also supports memory, mongodb, and redis.
requests_cache.install_cache('demo_cache')
# Now requests is backed by a '_Store' object.
# <class 'requests_cache.backends.base._Store'>
resp = requests.get('http://httpbin.org/user-agent')
print type(resp.raw)
# /delay/1 takes 1 second to respond
# Without a cache this would take 10 seconds
# With a cache it takes 1 (warm) or 2 (cold)
# Existing code can use the cache without modification.
[requests.get('http://httpbin.org/delay/1') for i in xrange(10)]
# Populate the cache with more cool stuff
requests.get('http://httpbin.org/user-agent')
requests.get('http://httpbin.org/cookies')
# Process responses asynchronously using the cache API.
cache = requests_cache.core.get_cache()
# Dump cache for known URL (GET request)
# Depends on undocumented function _url_to_key
response, timestamp = cache.get_response_and_time(
cache._url_to_key('http://httpbin.org/user-agent'))
# get_response_and_time returns a normal requests Response object
pprint({'timestamp': timestamp,
'url': response.url,
'filename': basename(urlparse(response.url).path),
'content': response.content,
'type': type(response)})
# Dump complete cache (not just GET requests)
# cache.responses.values() returns a _Store sequence
# restore_response returns a requests Response
pprint([{'timestamp': timestamp,
'url': entry.url,
'filename': basename(urlparse(entry.url).path),
'content': cache.restore_response(entry).content,
'type': type(entry)
} for entry, timestamp in cache.responses.values()])
# Another way to dump the cache, using keys.
# Requires two list comprehensions to call get_response_and_time
pprint([{'timestamp': timestamp,
'url': response.url,
'filename': basename(urlparse(response.url).path),
'content': response.content,
'type': type(response)
}
for response, timestamp in [cache.get_response_and_time(key)
for key in cache.responses.keys()]
])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment