Skip to content

Instantly share code, notes, and snippets.

@devdazed
Created December 2, 2015 21:29
Show Gist options
  • Save devdazed/26b24d469cd1b5b5485d to your computer and use it in GitHub Desktop.
Save devdazed/26b24d469cd1b5b5485d to your computer and use it in GitHub Desktop.
SSTable Tombstone Counter
import fileinput, re, operator
from collections import Counter
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
def main():
keys = Counter()
data = 0
top = 25
for line in fileinput.input():
current = None
if '{"key"' in line:
current = re.findall(': "(.*)",', line)[0]
if '"t"' in line:
keys[current] += 1
data = data + len(line)
sorted_tombstones = sorted(keys.items(), key=operator.itemgetter(1))
sorted_tombstones.reverse()
print 'Read {0} keys and {1} of data'.format(len(sorted_tombstones), sizeof_fmt(data))
print 'Top {0} keys with highest number tombstones'.format(top)
n = 0
for pair in sorted_tombstones[0:top]:
n += 1
print "{0:3} {1} => {2}".format(str(n) + '.', pair[0], pair[1])
if __name__ == '__main__':
main()
@devdazed
Copy link
Author

devdazed commented Dec 2, 2015

usage:

sstable2json /path/to/sstable-Data.db | python tc.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment