Skip to content

Instantly share code, notes, and snippets.

@jdnc
Last active December 18, 2015 04:59
Show Gist options
  • Save jdnc/1b3b41657766a5ba0cc1 to your computer and use it in GitHub Desktop.
Save jdnc/1b3b41657766a5ba0cc1 to your computer and use it in GitHub Desktop.
# Licensed under a 3-clause BSD style license - see LICENSE.rst
from __future__ import print_function, division
import os
import tempfile
import sys
from astropy import log
# python 3 compatibility
try:
import urllib2
except ImportError:
import urllib.request as urllib2
import astropy.utils.data as aud
def get_file_from_url(url, verbose=False, cache=False, encoding=None,
show_progress=True, save=False, overwrite=False,
directory="./", savename=None):
'''
Utility function to download a file from a given URL.
This is essentially a wrapper around the `astropy.utils.data.get_readable_fileobj`
Parameters
----------
url : str
The url from which the file must be retrieved
verbose : bool, optional
Whether to show informative message, default is `False`
cache : bool, optional
Whether to cache contents of remote URL, default is `False`
encoding : str, optional
Default is `None`, see also `astropy.utils.data.get_readable_fileobj`
show_progress : bool, optional
Whether to show progress bar if file is downloaded from remote server.
Default is `True`
save : bool, optional
Whether to save the file on the local machine, default is `False`.
overwrite : bool, optional
Whether to replace an existing file with the file to be saved if
such a file already exists. Default is `False`
directory : str, optional
Specify directory where file must be saved. Default is current
directory.
savename : str, optional
Save the file by the name given in this string. If not specified
use a system generated temporary name.
Returns
-------
result : str, a string with the contents of the downloaded file.
Notes
-----
If `verbose` is not set and `savename` is not specified then the
user will not be informed of the temporary file name generated by
system.
'''
response = urllib2.urlopen(url)
with aud.get_readable_fileobj(response, encoding=encoding, cache=cache,
show_progress=show_progress) as f:
result = f.read()
try:
if save:
if savename is None:
savename = get_name_from_url(response)
# if directory does not exist then create directory
if directory[-1] != "/":
directory += "/"
if not os.path.exists(directory):
os.mkdir(directory)
final_file = directory + savename
if not overwrite and os.path.exists(final_file):
raise IOError("File {0} already exists and overwrite=False.".format(final_file))
if verbose:
print ("Saving file", final_file)
# write the downloaded file to local machine
outf = open(final_file, "w")
outf.write(result)
outf.close()
except:
log.error("Failed to save file {0}".format(final_file))
return result
def get_name_from_url(response):
# try to get file name from response header returned by server
name = response.info().get('Content-Disposition', None)
if name is not None:
name.split('filename=')[1]
name.replace('"', '').replace("'","")
if name != '':
return name
# otherwise generate a temporary file name
tempfile.tempdir = ""
name = tempfile.mktemp()
return name
@keflavich
Copy link

Also, some tricks you can use:

https://gist.github.com/keflavich/30bfdf6a13faec1c1640 (compare to this gist)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment