Last active
March 5, 2020 17:18
-
-
Save ju4nlu/df59af2c255d7502b3998b9d276df37f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Example of how to use this script: | |
from UdaGcloudStorage import download_file_from_url | |
# Get help using | |
help(download_file_from_url) | |
# Download the file to a temporal file | |
file_to_download='gs://path/to/a/file.csv' | |
download_file_from_url(url=file_to_download) | |
# Download the file to a specific file | |
dst_file = '/home/ju4nlu/Downloads/hey_this_is_a_test.csv' | |
download_file_from_url(url=file_to_download, | |
destination=dst_file) | |
""" | |
import os | |
import tempfile | |
from google.cloud.storage import Client | |
def print_done(): | |
print("-> done!\n") | |
def download_file_from_url(url: str, | |
destination: str = None) -> str: | |
"""Downloads a file from a given url. It only supports | |
GoogleStorage files currently. | |
:param url: URL to downloaded (with preffix), for example: | |
'gs://uda-test/this/is/a/file.csv' | |
:type url: str | |
:param destination: File where the result will be saved. If | |
not specified, a temporal file will be created | |
:type destionation: str | |
:raises NotImplementedError: When trying to download a file | |
which comes from an unknown source | |
:return: Path of the downloaded file, None otherwise | |
:rtype: str | |
""" | |
success = False | |
if destination: | |
final_path = destination | |
else: | |
outfile = tempfile.NamedTemporaryFile(delete=False) | |
final_path = outfile.name | |
if url.startswith('gs://'): | |
try: | |
prefix_len = len('gs://') | |
bucket = url[prefix_len:].split('/')[0] | |
success = _download_file(bucket, url[prefix_len + len(bucket) + 1:], final_path) | |
except Exception as ex: | |
os.remove(final_path) | |
else: | |
os.remove(final_path) | |
raise NotImplementedError("Unsupported source, currently supported sources are: 'gs://'") | |
return final_path if success else None | |
def _download_file(bucket_name: str, | |
file_name: str, | |
destination_path: str) -> str: | |
"""Downloads a file from a bucket and stores it | |
in a local file. If the local file doesn't exist, | |
it's created. | |
:param bucket_name: Name of the bucket where the file is located | |
:type bucket_name: str | |
:param file_name: Name file without bucket | |
:type file_name: str | |
:param destination_path: Path were the downloaded file will be stored | |
:type destination_path: str | |
:return: Path of the downloaded file, None if it was an error | |
:rtype: str | |
""" | |
print("Creating GCS client...") | |
storage_client = Client() | |
print_done() | |
try: | |
print(f"Getting bucket details from {bucket_name}...") | |
bucket = storage_client.get_bucket(bucket_name) | |
print_done() | |
print(f"Getting file information from {file_name}...") | |
blob_from = bucket.blob(file_name) | |
print_done() | |
print(f"Downloading file to local path {destination_path}...") | |
blob_from.download_to_filename(destination_path) | |
print_done() | |
print("The file was successfully downloaded") | |
return destination_path | |
except: | |
print("Error downloading file from gcloud", exc_info=True) | |
return None | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment