Skip to content

Instantly share code, notes, and snippets.

Last active March 4, 2019 10:18
Show Gist options
  • Save alanbchristie/9a364070c3c619499df971e6e09d8c65 to your computer and use it in GitHub Desktop.
Save alanbchristie/9a364070c3c619499df971e6e09d8c65 to your computer and use it in GitHub Desktop.
A simple Python 2.7 module to run basic validation checks on an exported Heketi database file
#!/usr/bin/env python
# A simple Python 2.7 module to run basic validation checks
# on an exported Heketi database file.
# > This is a work in progress - driven by trying to fix a damaged
# deployment. It does not check everything yet but checks what I
# believed to be important things like whether the nodes, volumes,
# devices and blocks agree with each other.
# > It's a simple piece of dictionary parsing code (designed by
# reverse-engineering the JSON file structure) but it found the problems
# in my DB file i.e. volume and device bricks with no corresponding
# entry in the brick list.
# The input file (a JSON file) is expected to be an export
# obtained with something like: -
# heketi db export --dbfile /var/lib/heketi/heketi.db --jsonfile db.json
# Run this utility with the exported file: -
# ./ db.json
# Alan Christie
# August 2018
import json
import os
import sys
USAGE = 'Usage: <jsonfile>'
# To see lots of stuf...
verbose = False
show_brick_sizes = False
# Go...
if len(sys.argv) != 2:
h_file = sys.argv[1]
if not os.path.exists(h_file):
print('No such file')
def ascii_encode_dict(data):
ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
return dict(map(ascii_encode, pair) for pair in data.items())
def warning(msg):
global num_warnings
print('WARNING: {}'.format(msg))
num_warnings += 1
def error(msg):
global num_errors
print('ERROR: {}'.format(msg))
num_errors += 1
num_warnings = 0
num_errors = 0
with open(h_file) as f:
data = json.load(f, object_hook=ascii_encode_dict)
# Digest "clusterentries"
# This section contains a list of node and volume identities in each cluster.
cluster_ids = data['clusterentries'].keys()
# Node identities
# indexed by cluster and in a 'grand' list of
cluster_node_ids = {}
node_ids = []
# Volume identities
# indexed by cluster and in a 'grand' list of
cluster_volume_ids = {}
volume_ids = []
for cluster_id in cluster_ids:
if not cluster_id in cluster_volume_ids:
cluster_volume_ids[cluster_id] = []
cluster_node_ids[cluster_id] = []
for node_id in data['clusterentries'][cluster_id]['Info']['nodes']:
node_id_str = node_id
for volume_id in data['clusterentries'][cluster_id]['Info']['volumes']:
volume_id_str = volume_id
# Summary...
print('# Clusters = {}'.format(len(cluster_ids)))
if verbose:
for cluster_id in cluster_ids:
print(' {}'.format(cluster_id))
print('# Nodes = {}'.format(len(node_ids)))
if verbose:
for node_id in node_ids:
print(' {}'.format(node_id))
print('# Volumes = {}'.format(len(volume_ids)))
if verbose:
for volume_id in volume_ids:
print(' {}'.format(volume_id))
# Digest "volumeentries"
# Each volume links back to the cluster and lists the bricks in it
# and the device it's on.
volumeentries = data['volumeentries'].keys()
volume_bricks = {}
volume_brick_ids = []
for volumeentry in volumeentries:
found = False
volumeentry_str = volumeentry
volume_bricks[volumeentry_str] = []
for cluster_id in cluster_ids:
if volumeentry in cluster_volume_ids[cluster_id]:
found = True
if not found:
error('volume not known to a cluster {}'.format(volumeentry))
# Check volume is in a known cluster
volume_cluster_id = data['volumeentries'][volumeentry_str]['Info'][
if volume_cluster_id not in cluster_ids:
error('volume {} cluster {} is not a cluster'.
format(volumeentry, volume_cluster_id))
# Collect volume Bricks
for brick_id in data['volumeentries'][volumeentry_str]['Bricks']:
brick_id_str = brick_id
if brick_id_str in volume_brick_ids:
error('duplicate Brick ID {} '.format(brick_id_str))
# Summary...
print('# Volume bricks = {}'.format(len(volume_brick_ids)))
if verbose:
for brick_id in volume_brick_ids:
print(' {}'.format(brick_id))
# Digest "deviceentries"
# The device entry identifies the node it's on and the bricks that are on it
deviceentries = data['deviceentries'].keys()
device_ids = []
device_bricks = {}
device_brick_ids = []
for deviceentry in deviceentries:
device_id_str = deviceentry
if device_id_str in device_ids:
error('Duplicate device {}'.format(device_id_str))
device_bricks[device_id_str] = []
deviceentry_node_id = data['deviceentries'][deviceentry]['NodeId']
if deviceentry_node_id not in node_ids:
error('Device {} node {} not known'.
format(deviceentry, deviceentry_node_id))
for brick_id in data['deviceentries'][deviceentry]['Bricks']:
brick_id_str = brick_id
if brick_id_str in device_brick_ids:
error('Device {} Brick {} already known'.
format(device_id_str, brick_id_str))
# Summary...
print('# Devices = {}'.format(len(device_ids)))
if verbose:
for device_id in device_ids:
print(' {}'.format(device_id))
print('# Device bricks = {}'.format(len(device_brick_ids)))
# Digest "brickentries"
# The bricks identify the device, node and volume they're on
brickentries = data['brickentries'].keys()
brickentry_ids = []
smallest_brick_size_g = None
smallest_brick_g = None
largest_brick_size_g = None
largest_brick_g = None
for brickentry in brickentries:
brickentry_id_str = brickentry
if brickentry_id_str in brickentry_ids:
error('Brick {} is not unique'.
if brickentry_id_str not in volume_brick_ids:
error('Brick {} is not known to a volume'.
if brickentry_id_str not in device_brick_ids:
error('Brick {} is not known to a device'.
brickentry_node_id = data['brickentries'][brickentry]['Info']['node']
brickentry_volume_id = data['brickentries'][brickentry]['Info']['volume']
brickentry_device_id = data['brickentries'][brickentry]['Info']['device']
brickentry_path = data['brickentries'][brickentry]['Info']['path']
brickentry_size_g = data['brickentries'][brickentry]['Info']['size'] / 1000000
brickentry_pending_id = data['brickentries'][brickentry]['Pending']['Id']
if brickentry_node_id not in node_ids:
error('Brick {} node {} not known'.
format(brickentry, brickentry_node_id))
if brickentry_volume_id not in volume_ids:
error('Brick {} volume {} not known'.
format(brickentry, brickentry_volume_id))
if brickentry_volume_id not in volume_ids:
error('Brick {} device {} not known'.
format(brickentry, brickentry_device_id))
if not brickentry_path:
error('Brick {} path is blank'.format(brickentry))
if brickentry_size_g <= 0:
error('Brick {} has odd size {}'.format(brickentry, brickentry_size))
if smallest_brick_size_g is None or brickentry_size_g < smallest_brick_size_g:
smallest_brick_size_g = brickentry_size_g
smallest_brick = brickentry_id_str
if largest_brick_size_g is None or brickentry_size_g > largest_brick_size_g:
largest_brick_size_g = brickentry_size_g
largest_brick = brickentry_id_str
if brickentry_pending_id:
warning('Brick {} is pending on ID {}'.
format(brickentry, brickentry_pending_id))
# Summary...
print('# Bricks = {}'.format(len(brickentry_ids)))
if show_brick_sizes:
print('# Smallest brick size = {:,} GiB ({})'.format(smallest_brick_size_g,
print('# Largest brick size = {:,} GiB ({})'.format(largest_brick_size_g,
if verbose:
for brickentry_id in brickentry_ids:
print(' {}'.format(brickentry_id))
# Digest "pendingoperations"
pendingoperations = data['pendingoperations'].keys()
if len(pendingoperations):
warning('There are pending operations ({})'.
# We've looked at each major section so let's do some
# cross-referential tests...
# Do the IDs listed in brickentries
# match the bricks listed against the volumes?
# i.e. is each brick in the volume list in the brickentries list?
if len(brickentry_ids) != len(volume_brick_ids):
warning('Number of brickentries ({})'
' differs from the number of volume bricks ({})'.
format(len(brickentry_ids), len(volume_brick_ids)))
for brick_id in volume_brick_ids:
if brick_id not in brickentry_ids:
# Which volume is this brick in?
lost_volume_id = None
for volume_id in volume_bricks:
if brick_id in volume_bricks[volume_id]:
lost_volume_id = volume_id
error('Volume {} brick {} not in brickentries'.
format(lost_volume_id, brick_id))
# Do the IDs listed in brickentries
# match the bricks listed against the devices?
# i.e. is each brick in the device list in the brickentries list?
if len(brickentry_ids) != len(device_brick_ids):
warning('Number of brickentries ({})'
' differs from the number of device bricks ({})'.
format(len(brickentry_ids), len(device_brick_ids)))
for brick_id in device_brick_ids:
if brick_id not in brickentry_ids:
# Which device is this brick in?
lost_device_id = None
for device_id in device_bricks:
if brick_id in device_bricks[device_id]:
lost_device_id = device_id
error('Device {} brick {} not in brickentries'.
format(lost_device_id, brick_id))
# OK?
if num_warnings or num_errors:
print('Done [There were issues]')
print('Done [Looks Good]')
Copy link
Author might also be worth familiarising yourself with the troubleshooting guide at

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment