Skip to content

Instantly share code, notes, and snippets.

@daniel-j-h
Last active August 29, 2015 14:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save daniel-j-h/d7d87dfe5de3c5bbfd0f to your computer and use it in GitHub Desktop.
Save daniel-j-h/d7d87dfe5de3c5bbfd0f to your computer and use it in GitHub Desktop.
Ceph: assert uniform object to placement group distribution using a Chi-Squared Hypothesis Test
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import sys, json
from collections import defaultdict
from scipy.stats import chisquare
# usage: ceph pg dump_json | ./check_distribution.py
if __name__ == '__main__':
raw = sys.stdin.read()
stats = json.loads(raw)
collector = defaultdict(list)
for pg in stats['pg_stats']:
pool, _ = pg['pgid'].split('.')
num_objects = pg['stat_sum']['num_objects']
collector[pool].append(num_objects)
# if there are no objects at all, a Chi-Square Test does not make sense -- esp. re. div-by-0
grouped = [{'id':pool_id, 'observations':observations} for pool_id,observations in collector.items() if sum(observations) != 0]
#print(json.dumps(grouped, sort_keys=True, indent=2))
evaluated = [{'pool': pool['id'], 'test':{k:'{:f}'.format(v) for k,v in zip(['chi_squared', 'p_value'], chisquare(pool['observations']))}} for pool in grouped]
print(json.dumps(evaluated, sort_keys=True, indent=2))
# vim: set tabstop=4 shiftwidth=4 expandtab:
[
{
"pool": "37",
"test": {
"chi_squared": "5.903030",
"p_value": "0.551117"
}
},
{
"pool": "36",
"test": {
"chi_squared": "4.695652",
"p_value": "0.697047"
}
},
{
"pool": "35",
"test": {
"chi_squared": "8.485834",
"p_value": "0.291706"
}
}
]
[
{
"pool": "8",
"test": {
"chi_squared": "49.000000",
"p_value": "0.000000"
}
}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment