Skip to content

Instantly share code, notes, and snippets.

@mnadel
Created April 4, 2018 17:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mnadel/b7195a4c8b0599da80d0c3d32832f594 to your computer and use it in GitHub Desktop.
Save mnadel/b7195a4c8b0599da80d0c3d32832f594 to your computer and use it in GitHub Desktop.
du (disk usage) but for s3 (via ls-bucket)
#!/usr/bin/env python
import sys
import argparse
parser = argparse.ArgumentParser(description="du your s3")
parser.add_argument("--max-depth", type=int, help="max depth to report")
parser.add_argument("--raw", action="store_true", help="do not pretty print output")
args = parser.parse_args()
class Summary:
def __init__(self):
self._sizes = {}
self._ts = {}
def record_size(self, path, size):
if path in self._sizes:
self._sizes[path] = self._sizes[path] + size
else:
self._sizes[path] = size
def record_ts(self, path, ts):
if path not in self._ts or ts > self._ts[path]:
self._ts[path] = ts
def print(self, raw_output=True):
for k, v in [x for x in self._sizes.items() if x[1] > 0]:
if not raw_output:
try:
import humanize
print("{} {} {}".format(k, humanize.naturalsize(v, binary=True), self._ts[k]))
except:
print("{} {:,} {}".format(k, v, self._ts[k]))
else:
print("{} {} {}".format(k, v, self._ts[k]))
summary = Summary()
def parse_date(date, time):
return "{}T{}Z".format(date, time)
for line in sys.stdin:
size, key, date, time, tz = line.strip().split()
paths = key.split("/")[0:-1]
if args.max_depth > 0 and len(paths) > args.max_depth:
path = "/".join(paths[0:args.max_depth])
elif len(paths) > 0:
path = "/".join(paths)
else:
path = "/"
summary.record_size(path, int(size))
summary.record_ts(path, parse_date(date, time))
summary.print(args.raw)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment