Created
February 13, 2017 20:24
-
-
Save cernceph/ac273d5846ae8cdd6b5abd4e0574ec55 to your computer and use it in GitHub Desktop.
Matrix solution method for CRUSH Multipick anomaly
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import random | |
from math import sqrt, log, pow | |
DEBUG = True | |
NPGS = 100000 # num PGs | |
R = 3 # num replicas | |
if NPGS > 3: | |
DEBUG = False | |
print 'Num PGs:', NPGS | |
print 'Num Replicas:', R | |
# OSDs: (id, weight) | |
# Try using different sets of OSDs here! But note that the heuristics below are really tricky! | |
osds = [(0, 3.0), (1, 3.0), (2, 3.0), (3, 1.0)] | |
#osds = [(i,3.0) for i in xrange(5)] | |
#osds[0] = (0,4.0) | |
#osds[2] = (2,3.0) | |
assert R < len(osds), 'Must have fewer replicas than OSDs in this prototype' | |
# store the OSD weights in a dict for later usage | |
weights = {} | |
total_weight = 0 | |
for i, w in osds: | |
weights[i] = float(w) | |
total_weight += float(w) | |
print 'OSDs (id: weight):', weights | |
# compute the expected # PGs for our 4 OSDs | |
expected = {} | |
for i, w in osds: | |
expected[i] = NPGS * R * weights[i]/total_weight | |
print '\nExpected: ', expected | |
for r in xrange(R): | |
print ' Replica %d: %s' % (r, [float(v/R) for k,v in expected.iteritems()]) | |
# initialize PG and PG-by-replica-number counters for each OSD | |
pgs = {} | |
for i, w in osds: | |
pgs[i] = 0 | |
pgs_by_r = [dict(pgs) for r in xrange(R)] | |
# this function returns a random weighted choice | |
def weighted_choice(choices): | |
total = sum(w for c, w in choices) | |
r = random.uniform(0, total) | |
upto = 0 | |
for c, w in choices: | |
if upto + w >= r: | |
return c | |
upto += w | |
assert False, "Shouldn't get here" | |
print "### Simulating with existing CRUSH (demonstrate the multi-pick anomoly)" | |
# Existing CRUSH | |
# loop over all PGs, select one with weighted choice, remove that option for the next choice. | |
for i in xrange(NPGS): | |
choices = list(osds) | |
for r in xrange(R): | |
pick = weighted_choice(choices) | |
pgs[pick] += 1 | |
pgs_by_r[r][pick] += 1 | |
choices.remove((pick, weights[pick]),) | |
print 'Observed: ', pgs | |
for r in xrange(R): | |
print ' Replica %d: %s' % (r, pgs_by_r[r]) | |
print "### Simulating using PACE's matrix selection method" | |
# re-initialize PG and PG-by-replicanum counters for each OSD | |
pgs = {} | |
for i,w in osds: | |
pgs[i] = 0 | |
pgs_by_r = [dict(pgs) for r in xrange(R)] | |
import numpy as np | |
import itertools | |
# generate a list of all OSD combinations | |
osdsets = list(itertools.combinations([id for id,w in osds], R)) | |
print 'All OSD Combinations:', osdsets | |
# Compute the first N rows of matrix A | |
AT = [] # this is the transpose of A | |
for osdset in osdsets: | |
ATi = [] | |
for i, w in osds: | |
if i in osdset: | |
ATi.append(1.0) | |
else: | |
ATi.append(0.0) | |
AT.append(ATi) | |
A = list(np.transpose(AT)) # Get A and store it as a list so we can later append | |
# Generate the first N entries in matrix B | |
B = [w for i, w in osds] | |
# Now we need to add additional constraints so that A x = B is solveable. | |
# Heuristic #1 | |
# Set the probability of the first OSD combination to 1.0 | |
# This doesn't always work -- maybe it *would* always work if we first sort the OSD combinations by their weight sums. | |
# if A is not square: | |
if len(A) < len(A[0]): | |
Ai = [0.0 for k in xrange(len(A[0]))] | |
Ai[0] = 1.0 | |
A.append(Ai) | |
B.append(1.0) | |
# Heuristic #2 | |
# For each OSD combination, find another having identically weighted osds. Add this constraint using A += (.., 1,..,-1,..) and B += 0 | |
# loop over all i,j pairs of OSD combinations | |
for i in xrange(len(osdsets)): | |
for j in xrange(i+1,len(osdsets)): | |
assert i != j, "Shouldn't be possible!" | |
# if A is not square | |
if len(A) < len(A[0]): | |
Ai = [0.0 for k in xrange(len(A[0]))] | |
identical = True | |
for r in xrange(R): | |
i_weight = weights[osdsets[i][r]] | |
j_weight = weights[osdsets[j][r]] | |
if i_weight != j_weight: | |
identical = False | |
if identical: | |
Ai[i] = 1.0 | |
Ai[j] = -1.0 | |
A.append(Ai) | |
B.append(0.0) | |
else: | |
break | |
assert len(A) == len(A[0]), 'Coefficient matrix A is not square!' | |
# Now solve for x where A x = B | |
print 'Solving A x = B' | |
print 'A =' | |
print np.array(A) | |
print 'B =' | |
print np.array(B) | |
x = np.linalg.solve(A,B) | |
print 'x =' | |
print x | |
for weight in x: | |
assert weight >= 0.0, 'Negative weight, you either added impossible constraints or the OSD weights are unusable!' | |
# store the weighed OSD combinations so they can be used by the weighted_choice function | |
weighted_osdsets = [] | |
i = 0 | |
for osdset in osdsets: | |
weighted_osdsets.append((osdset,x[i])) | |
i += 1 | |
# select your NPGS PGs. | |
for i in xrange(NPGS): | |
# make a weighted random choice | |
combo = list(weighted_choice(weighted_osdsets)) | |
# shuffle the items in this choice | |
random.shuffle(combo) | |
# record this choice | |
r = 0 | |
for pick in combo: | |
pgs[pick] += 1 | |
pgs_by_r[r][pick] += 1 | |
r += 1 | |
print 'Observed: ', pgs | |
for r in xrange(R): | |
print ' Replica %d: %s' % (r, pgs_by_r[r]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment