Skip to content

Instantly share code, notes, and snippets.

@alanbchristie
Last active March 4, 2019 10:18
Show Gist options
  • Save alanbchristie/9a364070c3c619499df971e6e09d8c65 to your computer and use it in GitHub Desktop.
Save alanbchristie/9a364070c3c619499df971e6e09d8c65 to your computer and use it in GitHub Desktop.
A simple Python 2.7 module to run basic validation checks on an exported Heketi database file
#!/usr/bin/env python
# hekcheck.py
#
# A simple Python 2.7 module to run basic validation checks
# on an exported Heketi database file.
#
# > This is a work in progress - driven by trying to fix a damaged
# deployment. It does not check everything yet but checks what I
# believed to be important things like whether the nodes, volumes,
# devices and blocks agree with each other.
#
# > It's a simple piece of dictionary parsing code (designed by
# reverse-engineering the JSON file structure) but it found the problems
# in my DB file i.e. volume and device bricks with no corresponding
# entry in the brick list.
#
# The input file (a JSON file) is expected to be an export
# obtained with something like: -
#
# heketi db export --dbfile /var/lib/heketi/heketi.db --jsonfile db.json
#
# Run this utility with the exported file: -
#
# ./hekcheck.py db.json
#
# Alan Christie
# August 2018
import json
import os
import sys
USAGE = 'Usage: hekcheck.py <jsonfile>'
# To see lots of stuf...
verbose = False
show_brick_sizes = False
# Go...
if len(sys.argv) != 2:
print(USAGE)
sys.exit(1)
h_file = sys.argv[1]
if not os.path.exists(h_file):
print('No such file')
print(USAGE)
sys.exit(1)
def ascii_encode_dict(data):
ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
return dict(map(ascii_encode, pair) for pair in data.items())
def warning(msg):
global num_warnings
print('WARNING: {}'.format(msg))
num_warnings += 1
def error(msg):
global num_errors
print('ERROR: {}'.format(msg))
num_errors += 1
num_warnings = 0
num_errors = 0
with open(h_file) as f:
data = json.load(f, object_hook=ascii_encode_dict)
#
# Digest "clusterentries"
# This section contains a list of node and volume identities in each cluster.
#
cluster_ids = data['clusterentries'].keys()
# Node identities
# indexed by cluster and in a 'grand' list of
cluster_node_ids = {}
node_ids = []
# Volume identities
# indexed by cluster and in a 'grand' list of
cluster_volume_ids = {}
volume_ids = []
for cluster_id in cluster_ids:
if not cluster_id in cluster_volume_ids:
cluster_volume_ids[cluster_id] = []
cluster_node_ids[cluster_id] = []
for node_id in data['clusterentries'][cluster_id]['Info']['nodes']:
node_id_str = node_id
cluster_node_ids[cluster_id].append(node_id_str)
node_ids.append(node_id_str)
for volume_id in data['clusterentries'][cluster_id]['Info']['volumes']:
volume_id_str = volume_id
cluster_volume_ids[cluster_id].append(volume_id_str)
volume_ids.append(volume_id_str)
# Summary...
print('# Clusters = {}'.format(len(cluster_ids)))
if verbose:
for cluster_id in cluster_ids:
print(' {}'.format(cluster_id))
print('# Nodes = {}'.format(len(node_ids)))
if verbose:
for node_id in node_ids:
print(' {}'.format(node_id))
print('# Volumes = {}'.format(len(volume_ids)))
if verbose:
for volume_id in volume_ids:
print(' {}'.format(volume_id))
#
# Digest "volumeentries"
# Each volume links back to the cluster and lists the bricks in it
# and the device it's on.
#
volumeentries = data['volumeentries'].keys()
volume_bricks = {}
volume_brick_ids = []
for volumeentry in volumeentries:
found = False
volumeentry_str = volumeentry
volume_bricks[volumeentry_str] = []
for cluster_id in cluster_ids:
if volumeentry in cluster_volume_ids[cluster_id]:
found = True
if not found:
error('volume not known to a cluster {}'.format(volumeentry))
# Check volume is in a known cluster
volume_cluster_id = data['volumeentries'][volumeentry_str]['Info'][
'cluster']
if volume_cluster_id not in cluster_ids:
error('volume {} cluster {} is not a cluster'.
format(volumeentry, volume_cluster_id))
# Collect volume Bricks
for brick_id in data['volumeentries'][volumeentry_str]['Bricks']:
brick_id_str = brick_id
if brick_id_str in volume_brick_ids:
error('duplicate Brick ID {} '.format(brick_id_str))
volume_bricks[volumeentry_str].append(brick_id_str)
volume_brick_ids.append(brick_id_str)
# Summary...
print('# Volume bricks = {}'.format(len(volume_brick_ids)))
if verbose:
for brick_id in volume_brick_ids:
print(' {}'.format(brick_id))
#
# Digest "deviceentries"
# The device entry identifies the node it's on and the bricks that are on it
#
deviceentries = data['deviceentries'].keys()
device_ids = []
device_bricks = {}
device_brick_ids = []
for deviceentry in deviceentries:
device_id_str = deviceentry
if device_id_str in device_ids:
error('Duplicate device {}'.format(device_id_str))
device_ids.append(device_id_str)
device_bricks[device_id_str] = []
deviceentry_node_id = data['deviceentries'][deviceentry]['NodeId']
if deviceentry_node_id not in node_ids:
error('Device {} node {} not known'.
format(deviceentry, deviceentry_node_id))
for brick_id in data['deviceentries'][deviceentry]['Bricks']:
brick_id_str = brick_id
if brick_id_str in device_brick_ids:
error('Device {} Brick {} already known'.
format(device_id_str, brick_id_str))
device_brick_ids.append(brick_id_str)
device_bricks[device_id_str].append(brick_id_str)
# Summary...
print('# Devices = {}'.format(len(device_ids)))
if verbose:
for device_id in device_ids:
print(' {}'.format(device_id))
print('# Device bricks = {}'.format(len(device_brick_ids)))
#
# Digest "brickentries"
# The bricks identify the device, node and volume they're on
#
brickentries = data['brickentries'].keys()
brickentry_ids = []
smallest_brick_size_g = None
smallest_brick_g = None
largest_brick_size_g = None
largest_brick_g = None
for brickentry in brickentries:
brickentry_id_str = brickentry
if brickentry_id_str in brickentry_ids:
error('Brick {} is not unique'.
format(brickentry_id_str))
if brickentry_id_str not in volume_brick_ids:
error('Brick {} is not known to a volume'.
format(brickentry_id_str))
if brickentry_id_str not in device_brick_ids:
error('Brick {} is not known to a device'.
format(brickentry_id_str))
brickentry_ids.append(brickentry_id_str)
brickentry_node_id = data['brickentries'][brickentry]['Info']['node']
brickentry_volume_id = data['brickentries'][brickentry]['Info']['volume']
brickentry_device_id = data['brickentries'][brickentry]['Info']['device']
brickentry_path = data['brickentries'][brickentry]['Info']['path']
brickentry_size_g = data['brickentries'][brickentry]['Info']['size'] / 1000000
brickentry_pending_id = data['brickentries'][brickentry]['Pending']['Id']
if brickentry_node_id not in node_ids:
error('Brick {} node {} not known'.
format(brickentry, brickentry_node_id))
if brickentry_volume_id not in volume_ids:
error('Brick {} volume {} not known'.
format(brickentry, brickentry_volume_id))
if brickentry_volume_id not in volume_ids:
error('Brick {} device {} not known'.
format(brickentry, brickentry_device_id))
if not brickentry_path:
error('Brick {} path is blank'.format(brickentry))
if brickentry_size_g <= 0:
error('Brick {} has odd size {}'.format(brickentry, brickentry_size))
else:
if smallest_brick_size_g is None or brickentry_size_g < smallest_brick_size_g:
smallest_brick_size_g = brickentry_size_g
smallest_brick = brickentry_id_str
if largest_brick_size_g is None or brickentry_size_g > largest_brick_size_g:
largest_brick_size_g = brickentry_size_g
largest_brick = brickentry_id_str
if brickentry_pending_id:
warning('Brick {} is pending on ID {}'.
format(brickentry, brickentry_pending_id))
# Summary...
print('# Bricks = {}'.format(len(brickentry_ids)))
if show_brick_sizes:
print('# Smallest brick size = {:,} GiB ({})'.format(smallest_brick_size_g,
smallest_brick))
print('# Largest brick size = {:,} GiB ({})'.format(largest_brick_size_g,
largest_brick))
if verbose:
for brickentry_id in brickentry_ids:
print(' {}'.format(brickentry_id))
#
# Digest "pendingoperations"
#
pendingoperations = data['pendingoperations'].keys()
if len(pendingoperations):
warning('There are pending operations ({})'.
format(len(pendingoperations)))
# We've looked at each major section so let's do some
# cross-referential tests...
#
# Do the IDs listed in brickentries
# match the bricks listed against the volumes?
# i.e. is each brick in the volume list in the brickentries list?
if len(brickentry_ids) != len(volume_brick_ids):
warning('Number of brickentries ({})'
' differs from the number of volume bricks ({})'.
format(len(brickentry_ids), len(volume_brick_ids)))
for brick_id in volume_brick_ids:
if brick_id not in brickentry_ids:
# Which volume is this brick in?
lost_volume_id = None
for volume_id in volume_bricks:
if brick_id in volume_bricks[volume_id]:
lost_volume_id = volume_id
break
error('Volume {} brick {} not in brickentries'.
format(lost_volume_id, brick_id))
# Do the IDs listed in brickentries
# match the bricks listed against the devices?
# i.e. is each brick in the device list in the brickentries list?
if len(brickentry_ids) != len(device_brick_ids):
warning('Number of brickentries ({})'
' differs from the number of device bricks ({})'.
format(len(brickentry_ids), len(device_brick_ids)))
for brick_id in device_brick_ids:
if brick_id not in brickentry_ids:
# Which device is this brick in?
lost_device_id = None
for device_id in device_bricks:
if brick_id in device_bricks[device_id]:
lost_device_id = device_id
break
error('Device {} brick {} not in brickentries'.
format(lost_device_id, brick_id))
# OK?
if num_warnings or num_errors:
print('Done [There were issues]')
else:
print('Done [Looks Good]')
@alanbchristie
Copy link
Author

...it might also be worth familiarising yourself with the troubleshooting guide at https://github.com/heketi/heketi/blob/master/docs/troubleshooting.md

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment