Skip to content

Instantly share code, notes, and snippets.

@ju4nlu
Last active March 5, 2020 17:18
Show Gist options
  • Save ju4nlu/df59af2c255d7502b3998b9d276df37f to your computer and use it in GitHub Desktop.
Save ju4nlu/df59af2c255d7502b3998b9d276df37f to your computer and use it in GitHub Desktop.
"""
Example of how to use this script:
from UdaGcloudStorage import download_file_from_url
# Get help using
help(download_file_from_url)
# Download the file to a temporal file
file_to_download='gs://path/to/a/file.csv'
download_file_from_url(url=file_to_download)
# Download the file to a specific file
dst_file = '/home/ju4nlu/Downloads/hey_this_is_a_test.csv'
download_file_from_url(url=file_to_download,
destination=dst_file)
"""
import os
import tempfile
from google.cloud.storage import Client
def print_done():
print("-> done!\n")
def download_file_from_url(url: str,
destination: str = None) -> str:
"""Downloads a file from a given url. It only supports
GoogleStorage files currently.
:param url: URL to downloaded (with preffix), for example:
'gs://uda-test/this/is/a/file.csv'
:type url: str
:param destination: File where the result will be saved. If
not specified, a temporal file will be created
:type destionation: str
:raises NotImplementedError: When trying to download a file
which comes from an unknown source
:return: Path of the downloaded file, None otherwise
:rtype: str
"""
success = False
if destination:
final_path = destination
else:
outfile = tempfile.NamedTemporaryFile(delete=False)
final_path = outfile.name
if url.startswith('gs://'):
try:
prefix_len = len('gs://')
bucket = url[prefix_len:].split('/')[0]
success = _download_file(bucket, url[prefix_len + len(bucket) + 1:], final_path)
except Exception as ex:
os.remove(final_path)
else:
os.remove(final_path)
raise NotImplementedError("Unsupported source, currently supported sources are: 'gs://'")
return final_path if success else None
def _download_file(bucket_name: str,
file_name: str,
destination_path: str) -> str:
"""Downloads a file from a bucket and stores it
in a local file. If the local file doesn't exist,
it's created.
:param bucket_name: Name of the bucket where the file is located
:type bucket_name: str
:param file_name: Name file without bucket
:type file_name: str
:param destination_path: Path were the downloaded file will be stored
:type destination_path: str
:return: Path of the downloaded file, None if it was an error
:rtype: str
"""
print("Creating GCS client...")
storage_client = Client()
print_done()
try:
print(f"Getting bucket details from {bucket_name}...")
bucket = storage_client.get_bucket(bucket_name)
print_done()
print(f"Getting file information from {file_name}...")
blob_from = bucket.blob(file_name)
print_done()
print(f"Downloading file to local path {destination_path}...")
blob_from.download_to_filename(destination_path)
print_done()
print("The file was successfully downloaded")
return destination_path
except:
print("Error downloading file from gcloud", exc_info=True)
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment