Skip to content

Instantly share code, notes, and snippets.

@adam-phillipps
Last active August 2, 2019 16:00
Show Gist options
  • Save adam-phillipps/8ddbef195044d32fe06ca6743a3360b8 to your computer and use it in GitHub Desktop.
Save adam-phillipps/8ddbef195044d32fe06ca6743a3360b8 to your computer and use it in GitHub Desktop.
FROM python:3.6
RUN pip install google-cloud-storage polystores[gcs]
WORKDIR /usr/src/app
COPY . /usr/src/app/
###################################
# build like this, from a directory that has this file and the script
# docker build -t testeroo .
#
# run like this
#
# docker run -it --rm \
# -v <path to directory with your google creds>:/usr/creds \
# -e GOOGLE_APPLICATION_CREDENTIALS=/usr/creds/<name of your creds>.json \
# testeroo \
# python test_weird_empty_file.py gs://mybucket data/parent1 data
####################################
CMD ["/bin/bash"]
# build the container
$ docker build -t storetest .
# run the container.
# make sure to have google key in whichever volume directory you reference for the GOOGLE_APPLICATION_CREDENTIALS env var
$ docker run -it --rm \
-v ~/code/storetest/tmp/creds:/usr/creds \
-e GOOGLE_APPLICATION_CREDENTIALS=/usr/creds/mycreds.json \
testeroo
# then, from inside the container you can run it and inspect the data dir that was downloaded
/usr/src/app# python test_weird_empty_file.py gs://mybucket data/parent1 data
Uploading to data/parent1/...
Uploading to data/parent2/...
Uploading to data/parent1/arch.tar...
Uploading to data/parent1/sub1/image1.jpg...
Uploading to data/parent1/sub1/image2.jpg...
Traceback (most recent call last):
File "test_weird_empty_file.py", line 48, in <module>
download(bucket_addr, prefix, local)
File "test_weird_empty_file.py", line 31, in download
store.download_dir(dir_to_download, local)
File "/usr/local/lib/python3.6/site-packages/polystores/stores/gcs_store.py", line 327, in download_dir
use_basename=False)
File "/usr/local/lib/python3.6/site-packages/polystores/stores/gcs_store.py", line 254, in download_file
blob.download_to_filename(local_path)
File "/usr/local/lib/python3.6/site-packages/google/cloud/storage/blob.py", line 664, in download_to_filename
with open(filename, "wb") as file_obj:
IsADirectoryError: [Errno 21] Is a directory: '/usr/src/app/data/parent1'
# notice that much of the download did actually run through but we haven't pulled in the `parent2` dir
/usr/src/app# ls data/parent1/sub1/
image1.jpg image2.jpg
import os
import sys
from google.cloud import storage
from polystores.stores.gcs_store import GCSStore
def _client():
return storage.Client()
def upload(bucket_name):
client = _client()
bucket = client.get_bucket(bucket_name)
files = [
"data/parent1/",
"data/parent2/",
"data/parent1/arch.tar",
"data/parent1/sub1/image1.jpg",
"data/parent1/sub1/image2.jpg"
]
for remote in files:
blob = bucket.blob(remote)
print("Uploading to {}...".format(remote))
blob.upload_from_string(data="some file contents bla bla bla")
def download(remote, prefix, local):
store = GCSStore(client=_client())
dir_to_download = os.path.join(remote, prefix)
store.download_dir(dir_to_download, local)
files = store.list(os.path.join(remote, prefix))
print("Downloaded: {}".format(files))
# takes 3 args;
# 1. your bucket address; e.g. gs://mybucket
# 2. prefix for a download; e.g. data/parent1
# 3. location to download onto local disk; e.g. data
if __name__ == "__main__":
# import pdb
bucket_addr = sys.argv[1]
bucket_name = bucket_addr.split('/')[2]
prefix = sys.argv[2]
local = sys.argv[3]
# pdb.set_trace()
upload(bucket_name)
download(bucket_addr, prefix, local)
# tree of the bucket in google storage
.
└── data
├── parent1
│   ├── arch.tar
│   ├── sub1
│   │   ├── image1.jpg
│   │   └── image2.jpg
│   └── sub2
└── parent2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment