Skip to content

Instantly share code, notes, and snippets.

@clayg
Last active April 23, 2020 18:25
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save clayg/5d3b3ca13dea15f6a48efaa3cea92d7d to your computer and use it in GitHub Desktop.
Save clayg/5d3b3ca13dea15f6a48efaa3cea92d7d to your computer and use it in GitHub Desktop.
check handoffs vs. misplaced parts
import sys
import os
import errno
from argparse import ArgumentParser
from collections import defaultdict
from itertools import islice
from swift.common.storage_policy import split_policy_string
from swift.obj.diskfile import get_data_dir
from swift.common.ring import Ring
parser = ArgumentParser()
parser.add_argument('-v', '--verbose', help='line oriented output',
default=False, action='store_true')
parser.add_argument('ring', help='specify the ring, infers datadir')
parser.add_argument('devices', help='root of devices tree for node',
nargs='?', default='/srv/node')
parser.add_argument('--request-node-count',
help='max number of handoff device to check',
default=3, type=int)
def get_ring_and_datadir(path):
"""
:param path: path to ring
:returns: a tuple, (ring, datadir)
"""
ring_name = os.path.basename(path).split('.')[0]
base, policy = split_policy_string(ring_name)
if base == 'object':
datadir = get_data_dir(policy)
else:
datadir = base + 's'
return Ring(path), datadir
def main():
args = parser.parse_args()
device_root = args.devices
ring, datadir = get_ring_and_datadir(args.ring)
dev2parts = defaultdict(set)
for replica, part2dev in enumerate(ring._replica2part2dev_id):
for part, device_id in enumerate(part2dev):
dev2parts[ring.devs[device_id]['device']].add(part)
dev2handoffs = defaultdict(set)
searched = set()
def is_handoff(device_dir, part):
if part not in searched:
if args.verbose:
print 'get_more_nodes %s' % part
for dev in islice(ring.get_more_nodes(part),
args.request_node_count):
dev2handoffs[dev['device']].add(part)
searched.add(part)
return part in dev2handoffs[device_dir]
# print dev2parts
primary_count = defaultdict(int)
handoffs = defaultdict(set)
misplaced = defaultdict(set)
device_dirs = os.listdir(device_root)
for device_dir in device_dirs:
parts_dir = os.path.join(device_root, device_dir, datadir)
try:
parts = os.listdir(parts_dir)
except OSError as e:
if e.errno == errno.ENOENT:
continue
else:
raise
for part in parts:
if not part.isdigit():
continue
part = int(part)
if part in dev2parts[device_dir]:
primary_count[device_dir] += 1
continue
if is_handoff(device_dir, part):
handoffs[device_dir].add(part)
else:
misplaced[device_dir].add(part)
print '%9s: %9s %9s %9s' % ('device', 'primary', 'handoff', 'misplaced')
for device in set(
primary_count.keys() + handoffs.keys() + misplaced.keys()):
print '%9s: %9d %9d %9d' % (
device, primary_count[device],
len(handoffs[device]),
len(misplaced[device]),
)
if args.verbose:
print ' handoffs: %s' % (','.join(
str(p) for p in handoffs[device]))
print 'misplaced: %s' % (','.join(
str(p) for p in misplaced[device]))
if __name__ == "__main__":
sys.exit(main())
@afreiberger
Copy link

If you're interested, I added a --summary mode to output a sum()'d version of each column in json format for use with telegraf's exec plugin.
https://gist.github.com/afreiberger/4fad7cbb5b32e52f71b9c4d203d40285

Sample telegraf exec inputs config:

[[inputs.exec]]
commands = ["/path/to/classify_handoff_parts.py -s /etc/swift/object.ring.gz /srv/node", "/path/to/classify_handoff_parts.py -s /etc/swift/container.ring.gz /srv/node", "/path/to/classify_handoff_parts.py -s /etc/swift/account.ring.gz /srv/node"]
timeout = "10s"
data_format = "json"
name_suffix = "_swiftparts"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment