Skip to content

Instantly share code, notes, and snippets.

@vsoch
Last active September 2, 2020 18:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vsoch/f5a6a6d1894be1e67aa4156c5b40c8e9 to your computer and use it in GitHub Desktop.
Save vsoch/f5a6a6d1894be1e67aa4156c5b40c8e9 to your computer and use it in GitHub Desktop.
Testing Google Life Sciences updated download / upload script (will be put into version control)
#!/usr/bin/env python
# This is a helper script for the Google Life Sciences instance to be able to:
# 1. download a blob from storage, which is required at the onset of the Snakemake
# gls.py download <bucket> <source> <destination>
# workflow step to obtain the working directory.
# 2. Upload logs back to storage (or some specified directory of files)
# gls.py save <bucket> <source-dir> <destination-dir>
# gls.py save <bucket> /google/logs/output source/logs
import argparse
import datetime
from google.cloud import storage
from glob import glob
import sys
import os
def download_blob(bucket_name, source_blob_name, destination_file_name):
"""Downloads a blob from the bucket."""
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(source_blob_name)
blob.download_to_filename(destination_file_name)
print("Blob {} downloaded to {}.".format(source_blob_name, destination_file_name))
def save_files(bucket_name, source_path, destination_path):
"""given a directory path, save all files recursively to storage
"""
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
# destination path should be stripped of path indicators too
bucket_name = bucket_name.strip("/")
destination_path = destination_path.strip("/")
# These are fullpaths
filenames = get_source_files(source_path)
print("\nThe following files will be uploaded: %s" % "\n".join(filenames))
if not filenames:
print("Did not find any filenames under %s" % source_path)
# Do the upload!
for filename in filenames:
# The relative path of the filename from the source path
relative_path = filename.replace(source_path, "", 1).strip('/')
# The path in storage includes relative path from destination_path
storage_path = os.path.join(destination_path, relative_path)
full_path = os.path.join(bucket_name, storage_path)
print(f"{filename} -> {full_path}")
# Get the blob
blob = bucket.blob(storage_path)
if not blob.exists():
print("Uploading %s to %s" % (filename, full_path))
blob.upload_from_filename(filename)
def get_source_files(source_path):
"""Given a directory, return a listing of files to upload
"""
filenames = []
if not os.path.exists(source_path):
print("%s does not exist!" % source_path)
sys.exit(0)
for x in os.walk(source_path):
for name in glob(os.path.join(x[0], "*")):
if not os.path.isdir(name):
filenames.append(name)
return filenames
def add_ending_slash(filename):
"""Since we want to replace based on having an ending slash, ensure it's there
"""
if not filename.endswith("/"):
filename = "%s/" % filename
return filename
def blob_commands(args):
if args.command == "download":
download_blob(
args.bucket_name, args.source_blob_name, args.destination_file_name
)
elif args.command == "save":
save_files(
args.bucket_name, args.source_path, args.destination_path
)
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter
)
subparsers = parser.add_subparsers(dest="command")
# Download file from storage
download_parser = subparsers.add_parser("download", help=download_blob.__doc__)
download_parser.add_argument("bucket_name", help="Your cloud storage bucket.")
download_parser.add_argument("source_blob_name")
download_parser.add_argument("destination_file_name")
# Save logs to storage
save_parser = subparsers.add_parser("save", help=save_files.__doc__)
save_parser.add_argument("bucket_name", help="Your cloud storage bucket.")
save_parser.add_argument("source_path")
save_parser.add_argument("destination_path")
args = parser.parse_args()
blob_commands(args)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment