Skip to content

Instantly share code, notes, and snippets.

@reubano
Last active August 3, 2017 11:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reubano/90c6767f8c3ca27b451791eb411feccf to your computer and use it in GitHub Desktop.
Save reubano/90c6767f8c3ca27b451791eb411feccf to your computer and use it in GitHub Desktop.
boto3==1.4.4
s3fs==0.1.1
pydicom==0.9.9
from os import listdir, path
try:
import boto3
except ImportError:
ClientError = boto3 = None
else:
from botocore.exceptions import ClientError
try:
from s3fs import S3FileSystem
except ImportError:
s3fs = None
else:
s3fs = S3FileSystem()
def gen_s3_dir_names(dirpath):
split_path = list(filter(None, dirpath.split('/')))
split_path_len = len(split_path)
if split_path_len < 2:
# invalid path, e.g., s3://
raise FileNotFoundError('No bucket found in your path.')
bucket_name = split_path[1]
error_msg = "No such bucket: '{}'".format(bucket_name)
if split_path_len == 2:
# in the bucket's top level, e.g., s3://dd-stage2
s3client = boto3.client('s3')
paginator = s3client.get_paginator('list_objects')
result = paginator.paginate(Bucket=bucket_name, Delimiter='/')
try:
for res in result.search('CommonPrefixes'):
yield res.get('Prefix').rstrip('/')
except ClientError:
raise FileNotFoundError(error_msg)
else:
# inside a folder so we need to filter by the prefix, e.g.,
# s3://dd-stage2/004828796b994741c4466f59a8c7e9a4
s3resource = boto3.resource('s3')
bucket = s3resource.Bucket(bucket_name)
prefix = '{}/'.format('/'.join(split_path[2:]))
try:
for obj in bucket.objects.filter(Prefix=prefix):
yield obj.key.replace(prefix, '')
except ClientError:
raise FileNotFoundError(error_msg)
def gendir(dirpath, as_abspath=False, as_file_obj=False, exclude_hidden=True):
"""like os.listdir but returns an iterator, opens s3 paths, and optionally
returns abspaths or file-like objects
"""
is_s3 = dirpath.startswith('s3://')
stripped_path = dirpath.rstrip('/')
if is_s3 and not boto3:
msg = 'Warning! You must install boto3 before calling this function.'
raise ImportError(msg)
elif is_s3:
names = gen_s3_dir_names(stripped_path)
else:
names = iter(listdir(dirpath))
if exclude_hidden:
names = (name for name in names if not name.startswith('.'))
if is_s3 and as_file_obj and not s3fs:
msg = 'Warning! You must install s3fs before calling this function.'
raise ImportError(msg)
elif is_s3 and (as_abspath or as_file_obj):
paths = (path.join(stripped_path, name) for name in names)
entries = map(s3fs.open, paths) if as_file_obj else paths
elif as_abspath or as_file_obj:
paths = (path.join(path.abspath(stripped_path), name) for name in names)
entries = (open(p, mode='rb') for p in paths) if as_file_obj else paths
else:
entries = names
return entries
import dicom
from s3utils import gendir
files = gendir(dir_abspath, as_file_obj=True)
slices = map(dicom.read_file, files)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment