Created
August 22, 2016 09:48
-
-
Save dvdbng/44b00161481f9c45223c1f1fddc53148 to your computer and use it in GitHub Desktop.
List bucket skipping over some directories
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fast_list_keys(bucket, prefix="", ignore_dirs=()): | |
""" | |
Like boto.s3.bucket.list but skip over directories named in ignore_dirs | |
""" | |
marker = "" | |
more_results = True | |
ignore_dirs = set(ignore_dirs) | |
keys = [] | |
while more_results: | |
rs = bucket.get_all_keys(prefix=prefix, marker=marker) | |
for key in rs: | |
if ignore_dirs.isdisjoint(set(key.name.split('/'))): | |
keys.append(key) | |
if key: | |
marker = rs.next_marker or key.name | |
marker_parts = marker.split("/") | |
for part in marker_parts: | |
if part in ignore_dirs: | |
# Use a marker ending with a high code-point character to skip | |
# to the end of the directory. | |
marker = re.sub('\\b' + part + '\\b.*$', part + '/~', marker) | |
break | |
more_results = rs.is_truncated | |
return keys |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment