Last active
September 2, 2020 18:50
-
-
Save vsoch/f5a6a6d1894be1e67aa4156c5b40c8e9 to your computer and use it in GitHub Desktop.
Testing Google Life Sciences updated download / upload script (will be put into version control)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# This is a helper script for the Google Life Sciences instance to be able to: | |
# 1. download a blob from storage, which is required at the onset of the Snakemake | |
# gls.py download <bucket> <source> <destination> | |
# workflow step to obtain the working directory. | |
# 2. Upload logs back to storage (or some specified directory of files) | |
# gls.py save <bucket> <source-dir> <destination-dir> | |
# gls.py save <bucket> /google/logs/output source/logs | |
import argparse | |
import datetime | |
from google.cloud import storage | |
from glob import glob | |
import sys | |
import os | |
def download_blob(bucket_name, source_blob_name, destination_file_name): | |
"""Downloads a blob from the bucket.""" | |
storage_client = storage.Client() | |
bucket = storage_client.get_bucket(bucket_name) | |
blob = bucket.blob(source_blob_name) | |
blob.download_to_filename(destination_file_name) | |
print("Blob {} downloaded to {}.".format(source_blob_name, destination_file_name)) | |
def save_files(bucket_name, source_path, destination_path): | |
"""given a directory path, save all files recursively to storage | |
""" | |
storage_client = storage.Client() | |
bucket = storage_client.get_bucket(bucket_name) | |
# destination path should be stripped of path indicators too | |
bucket_name = bucket_name.strip("/") | |
destination_path = destination_path.strip("/") | |
# These are fullpaths | |
filenames = get_source_files(source_path) | |
print("\nThe following files will be uploaded: %s" % "\n".join(filenames)) | |
if not filenames: | |
print("Did not find any filenames under %s" % source_path) | |
# Do the upload! | |
for filename in filenames: | |
# The relative path of the filename from the source path | |
relative_path = filename.replace(source_path, "", 1).strip('/') | |
# The path in storage includes relative path from destination_path | |
storage_path = os.path.join(destination_path, relative_path) | |
full_path = os.path.join(bucket_name, storage_path) | |
print(f"{filename} -> {full_path}") | |
# Get the blob | |
blob = bucket.blob(storage_path) | |
if not blob.exists(): | |
print("Uploading %s to %s" % (filename, full_path)) | |
blob.upload_from_filename(filename) | |
def get_source_files(source_path): | |
"""Given a directory, return a listing of files to upload | |
""" | |
filenames = [] | |
if not os.path.exists(source_path): | |
print("%s does not exist!" % source_path) | |
sys.exit(0) | |
for x in os.walk(source_path): | |
for name in glob(os.path.join(x[0], "*")): | |
if not os.path.isdir(name): | |
filenames.append(name) | |
return filenames | |
def add_ending_slash(filename): | |
"""Since we want to replace based on having an ending slash, ensure it's there | |
""" | |
if not filename.endswith("/"): | |
filename = "%s/" % filename | |
return filename | |
def blob_commands(args): | |
if args.command == "download": | |
download_blob( | |
args.bucket_name, args.source_blob_name, args.destination_file_name | |
) | |
elif args.command == "save": | |
save_files( | |
args.bucket_name, args.source_path, args.destination_path | |
) | |
def main(): | |
parser = argparse.ArgumentParser( | |
formatter_class=argparse.RawDescriptionHelpFormatter | |
) | |
subparsers = parser.add_subparsers(dest="command") | |
# Download file from storage | |
download_parser = subparsers.add_parser("download", help=download_blob.__doc__) | |
download_parser.add_argument("bucket_name", help="Your cloud storage bucket.") | |
download_parser.add_argument("source_blob_name") | |
download_parser.add_argument("destination_file_name") | |
# Save logs to storage | |
save_parser = subparsers.add_parser("save", help=save_files.__doc__) | |
save_parser.add_argument("bucket_name", help="Your cloud storage bucket.") | |
save_parser.add_argument("source_path") | |
save_parser.add_argument("destination_path") | |
args = parser.parse_args() | |
blob_commands(args) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment