Skip to content

Instantly share code, notes, and snippets.

@dvdbng
Created August 22, 2016 09:48
Show Gist options
  • Save dvdbng/44b00161481f9c45223c1f1fddc53148 to your computer and use it in GitHub Desktop.
Save dvdbng/44b00161481f9c45223c1f1fddc53148 to your computer and use it in GitHub Desktop.
List bucket skipping over some directories
def fast_list_keys(bucket, prefix="", ignore_dirs=()):
"""
Like boto.s3.bucket.list but skip over directories named in ignore_dirs
"""
marker = ""
more_results = True
ignore_dirs = set(ignore_dirs)
keys = []
while more_results:
rs = bucket.get_all_keys(prefix=prefix, marker=marker)
for key in rs:
if ignore_dirs.isdisjoint(set(key.name.split('/'))):
keys.append(key)
if key:
marker = rs.next_marker or key.name
marker_parts = marker.split("/")
for part in marker_parts:
if part in ignore_dirs:
# Use a marker ending with a high code-point character to skip
# to the end of the directory.
marker = re.sub('\\b' + part + '\\b.*$', part + '/~', marker)
break
more_results = rs.is_truncated
return keys
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment