Skip to content

Instantly share code, notes, and snippets.

@portante
Last active January 21, 2021 21:10
Show Gist options
  • Save portante/6445817500377826f6620e6527af16c2 to your computer and use it in GitHub Desktop.
Save portante/6445817500377826f6620e6527af16c2 to your computer and use it in GitHub Desktop.
A script to generate a report of Elasticsearch index usage (from _cat/indices?v&bytes=b) by prefix for a set of known date suffixes.
#!/usr/bin/env python2
# A script to generate a report of Elasticsearch index usage
# (from _cat/indices?v&bytes=b) by prefix for a set of known
# date suffixes.
#
# E.g.
# $ curl -X GET http://localhost:9200/_cat/indices?v\&bytes=b -o indices.lis
# $ ./sum-es-indices.py indices.lis
#
# A second argument of b, k, m, or g can be given to specify the
# units in which the "size" of an index is reported.
import sys
import re
import collections
import operator
import locale
locale.setlocale(locale.LC_ALL, 'en_US')
open_indices = []
closed_indices = []
prefixes = collections.defaultdict(int)
_factors = { 'b': 1, 'k': 1024, 'm': 1024*1024, 'g': 1024*1024*1024 }
try:
units = sys.argv[2]
except IndexError:
units = 'b'
try:
factor = _factors[units]
except KeyError:
factor = 1
with open(sys.argv[1], "r") as fp:
header = fp.readline()
header_parts = header.split()
for line in fp.readlines():
parts = line[:-1].split()
if len(parts) == 2 and parts[0] == "close":
closed_indices.append({ 'index': parts[1] })
elif header_parts[3] == 'uuid':
open_indices.append({ 'index': parts[2], 'status': parts[1], 'health': parts[0], 'pri': int(parts[4]), 'rep': int(parts[5]), 'docs.count': int(parts[6]), 'docs.deleted': int(parts[7]), 'store.size': int(parts[8]) * factor, 'pri.store.size': int(parts[9]) * factor })
else:
open_indices.append({ 'index': parts[2], 'status': parts[1], 'health': parts[0], 'pri': int(parts[3]), 'rep': int(parts[4]), 'docs.count': int(parts[5]), 'docs.deleted': int(parts[6]), 'store.size': int(parts[7]) * factor, 'pri.store.size': int(parts[8]) * factor })
dotdate_r = re.compile(r"(.+)([0-9]{4,})\.([0-9]{2,})\.([0-9]{2,})$")
dashdate_r = re.compile(r"(.+)([0-9]{4,})-([0-9]{2,})-([0-9]{2,})$")
dashdateym_r = re.compile(r"(.+)([0-9]{4,})-([0-9]{2,})$")
numdate_r = re.compile(r"(.+)([0-9]{4,})([0-9]{2,})([0-9]{2,})$")
numdateym_r = re.compile(r"(.+)([0-9]{4,})([0-9]{2,})$")
patterns = [ dotdate_r, dashdate_r, dashdateym_r, numdate_r, numdateym_r ]
def domatch(pat, index_name):
m = pat.match(index_name)
if m is None:
return None
prefix = m.group(1)
prefixes[prefix] += 1
return prefix
for idx in open_indices:
name = idx['index']
for pat in patterns:
prefix = domatch(pat, name)
if prefix:
idx['prefix'] = prefix
break
else:
prefixes[name] += 1
for idx in closed_indices:
name = idx['index']
for pat in patterns:
prefix = domatch(pat, name)
if prefix:
idx['prefix'] = prefix
break
else:
prefixes[name] += 1
stats = {}
for pre in prefixes:
closed = 0
for idx in closed_indices:
try:
prefix = idx['prefix']
except KeyError:
prefix = idx['index']
if prefix == pre:
closed += 1
opened = 0
green = 0
yellow = 0
red = 0
docs = 0
deleted = 0
size = 0
max_pri = 0
for idx in open_indices:
try:
prefix = idx['prefix']
except KeyError:
prefix = idx['index']
if prefix == pre:
opened += 1
if idx['health'] == 'green':
green += 1
elif idx['health'] == 'yellow':
yellow += 1
else:
assert idx['health'] == 'red'
red += 1
docs += idx['docs.count']
deleted += idx['docs.deleted']
size += idx['store.size']
max_pri = max(idx['pri'], max_pri)
stats[pre] = { 'closed': closed, 'opened': opened, 'green': green, 'yellow': yellow, 'red': red, 'docs': docs, 'deleted': deleted, 'size': size, 'max_pri': max_pri }
def n(val):
return locale.format("%d", val, grouping=True)
def f(val):
return locale.format("%0.1f", val, grouping=True)
format_str = "%9s %6s %6s %5s %6s %5s %20s %10s %7s %20s %10s %s"
print format_str % ("indices", 'closed', 'opened', 'green', 'yellow', 'red', 'docs', 'deleted', 'max_pri', 'size', 'avg/sz', 'prefix')
sorted_prefixes = sorted(prefixes.items(), key=operator.itemgetter(1), reverse=True)
for pre,v in sorted_prefixes:
stat = stats[pre]
print format_str % (n(v), n(stat['closed']), n(stat['opened']), n(stat['green']), n(stat['yellow']), n(stat['red']), n(stat['docs']), n(stat['deleted']), stat['max_pri'], f(stat['size']/factor), f((stat['size']/float(stat['docs'])) if stat['docs'] else 0), pre)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment