Skip to content

Instantly share code, notes, and snippets.

@mcuelenaere
Last active January 29, 2019 17:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mcuelenaere/8719c4ff2c7c96f33e7899ced0df799a to your computer and use it in GitHub Desktop.
Save mcuelenaere/8719c4ff2c7c96f33e7899ced0df799a to your computer and use it in GitHub Desktop.
import sys
from collections import defaultdict
# To be used like this:
# redis-cli --scan | python aggregate_redis_keys.py
prefixes = (
'dev_',
'prod_',
'staging_',
)
categories = (
'foo_views_',
'bar_views_',
'foobars_',
)
prefix_counts = defaultdict(int)
category_counts = defaultdict(lambda: defaultdict(int))
total_count = 0
for line in sys.stdin:
line = line.rstrip()
longest_prefix = ''
for prefix in prefixes:
if line.startswith(prefix):
prefix_counts[prefix] += 1
if len(prefix) > len(longest_prefix):
longest_prefix = prefix
line = line[len(longest_prefix):]
found = False
for category in categories:
if line.startswith(category):
category_counts[longest_prefix][category] += 1
found = True
break
if not found:
print('Did not found %s in list of categories' % line)
total_count += 1
for prefix, counts in category_counts.iteritems():
for category, count in sorted(counts.iteritems(), key=lambda x: x[1]):
print("% 10d (% 2.f%%) [% 30s] %s" % (count, count * 100 / total_count, prefix, category))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment