Skip to content

Instantly share code, notes, and snippets.

@ericbusboom
Last active August 29, 2015 14:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ericbusboom/961ec7b4cb623f86448b to your computer and use it in GitHub Desktop.
Save ericbusboom/961ec7b4cb623f86448b to your computer and use it in GitHub Desktop.
cache_url function
import requests
def cache_url(url):
"""Load a url to a local file, or return the file path if it already exists
This function will download the given URL to the current directory, with a file name of the
last path element in the URL. """
import os
import urlparse
from IPython.display import display, clear_output
import sys
parts = urlparse.urlparse(url)
if not parts.scheme in ['http','https', 'ftp']:
raise Exception("Probably not a valid URL")
# Return the file directly if it already exists
fn = os.path.basename(parts.path)
if os.path.exists(fn):
return os.path.abspath(fn)
r = requests.get(url, stream=True)
total = 0
with open(fn, 'wb') as fd:
for chunk in r.iter_content(2**20): # Read about 1MB, final len() is longer because of decompression
fd.write(chunk)
clear_output(wait=True)
total += len(chunk)
print "Wrote {} bytes, total {}".format(len(chunk), total)
sys.stdout.flush()
return os.path.abspath(fn)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment