Skip to content

Instantly share code, notes, and snippets.

@cernceph
Created February 13, 2017 20:24
Show Gist options
  • Save cernceph/ac273d5846ae8cdd6b5abd4e0574ec55 to your computer and use it in GitHub Desktop.
Save cernceph/ac273d5846ae8cdd6b5abd4e0574ec55 to your computer and use it in GitHub Desktop.
Matrix solution method for CRUSH Multipick anomaly
#!/usr/bin/env python
import random
from math import sqrt, log, pow
DEBUG = True
NPGS = 100000 # num PGs
R = 3 # num replicas
if NPGS > 3:
DEBUG = False
print 'Num PGs:', NPGS
print 'Num Replicas:', R
# OSDs: (id, weight)
# Try using different sets of OSDs here! But note that the heuristics below are really tricky!
osds = [(0, 3.0), (1, 3.0), (2, 3.0), (3, 1.0)]
#osds = [(i,3.0) for i in xrange(5)]
#osds[0] = (0,4.0)
#osds[2] = (2,3.0)
assert R < len(osds), 'Must have fewer replicas than OSDs in this prototype'
# store the OSD weights in a dict for later usage
weights = {}
total_weight = 0
for i, w in osds:
weights[i] = float(w)
total_weight += float(w)
print 'OSDs (id: weight):', weights
# compute the expected # PGs for our 4 OSDs
expected = {}
for i, w in osds:
expected[i] = NPGS * R * weights[i]/total_weight
print '\nExpected: ', expected
for r in xrange(R):
print ' Replica %d: %s' % (r, [float(v/R) for k,v in expected.iteritems()])
# initialize PG and PG-by-replica-number counters for each OSD
pgs = {}
for i, w in osds:
pgs[i] = 0
pgs_by_r = [dict(pgs) for r in xrange(R)]
# this function returns a random weighted choice
def weighted_choice(choices):
total = sum(w for c, w in choices)
r = random.uniform(0, total)
upto = 0
for c, w in choices:
if upto + w >= r:
return c
upto += w
assert False, "Shouldn't get here"
print
print "### Simulating with existing CRUSH (demonstrate the multi-pick anomoly)"
print
# Existing CRUSH
# loop over all PGs, select one with weighted choice, remove that option for the next choice.
for i in xrange(NPGS):
choices = list(osds)
for r in xrange(R):
pick = weighted_choice(choices)
pgs[pick] += 1
pgs_by_r[r][pick] += 1
choices.remove((pick, weights[pick]),)
print 'Observed: ', pgs
for r in xrange(R):
print ' Replica %d: %s' % (r, pgs_by_r[r])
print
print "### Simulating using PACE's matrix selection method"
print
# re-initialize PG and PG-by-replicanum counters for each OSD
pgs = {}
for i,w in osds:
pgs[i] = 0
pgs_by_r = [dict(pgs) for r in xrange(R)]
import numpy as np
import itertools
# generate a list of all OSD combinations
osdsets = list(itertools.combinations([id for id,w in osds], R))
print 'All OSD Combinations:', osdsets
print
# Compute the first N rows of matrix A
AT = [] # this is the transpose of A
for osdset in osdsets:
ATi = []
for i, w in osds:
if i in osdset:
ATi.append(1.0)
else:
ATi.append(0.0)
AT.append(ATi)
A = list(np.transpose(AT)) # Get A and store it as a list so we can later append
# Generate the first N entries in matrix B
B = [w for i, w in osds]
# Now we need to add additional constraints so that A x = B is solveable.
# Heuristic #1
# Set the probability of the first OSD combination to 1.0
# This doesn't always work -- maybe it *would* always work if we first sort the OSD combinations by their weight sums.
# if A is not square:
if len(A) < len(A[0]):
Ai = [0.0 for k in xrange(len(A[0]))]
Ai[0] = 1.0
A.append(Ai)
B.append(1.0)
# Heuristic #2
# For each OSD combination, find another having identically weighted osds. Add this constraint using A += (.., 1,..,-1,..) and B += 0
# loop over all i,j pairs of OSD combinations
for i in xrange(len(osdsets)):
for j in xrange(i+1,len(osdsets)):
assert i != j, "Shouldn't be possible!"
# if A is not square
if len(A) < len(A[0]):
Ai = [0.0 for k in xrange(len(A[0]))]
identical = True
for r in xrange(R):
i_weight = weights[osdsets[i][r]]
j_weight = weights[osdsets[j][r]]
if i_weight != j_weight:
identical = False
if identical:
Ai[i] = 1.0
Ai[j] = -1.0
A.append(Ai)
B.append(0.0)
else:
break
assert len(A) == len(A[0]), 'Coefficient matrix A is not square!'
# Now solve for x where A x = B
print 'Solving A x = B'
print
print 'A ='
print np.array(A)
print
print 'B ='
print np.array(B)
print
x = np.linalg.solve(A,B)
print 'x ='
print x
print
for weight in x:
assert weight >= 0.0, 'Negative weight, you either added impossible constraints or the OSD weights are unusable!'
# store the weighed OSD combinations so they can be used by the weighted_choice function
weighted_osdsets = []
i = 0
for osdset in osdsets:
weighted_osdsets.append((osdset,x[i]))
i += 1
# select your NPGS PGs.
for i in xrange(NPGS):
# make a weighted random choice
combo = list(weighted_choice(weighted_osdsets))
# shuffle the items in this choice
random.shuffle(combo)
# record this choice
r = 0
for pick in combo:
pgs[pick] += 1
pgs_by_r[r][pick] += 1
r += 1
print 'Observed: ', pgs
for r in xrange(R):
print ' Replica %d: %s' % (r, pgs_by_r[r])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment