cernceph/mpa3.py

## mpa3.py
#!/usr/bin/env python

import random
from math import sqrt, log, pow

DEBUG = True
NPGS = 100000 # num PGs
R = 3  # num replicas

if NPGS > 3:
  DEBUG = False

print 'Num PGs:', NPGS
print 'Num Replicas:', R

# OSDs: (id, weight)
# Try using different sets of OSDs here! But note that the heuristics below are really tricky!
osds = [(0, 3.0), (1, 3.0), (2, 3.0), (3, 1.0)]
#osds = [(i,3.0) for i in xrange(5)]
#osds[0] = (0,4.0)
#osds[2] = (2,3.0)

assert R < len(osds), 'Must have fewer replicas than OSDs in this prototype'

# store the OSD weights in a dict for later usage
weights = {}
total_weight = 0
for i, w in osds:
  weights[i] = float(w)
  total_weight += float(w)

print 'OSDs (id: weight):', weights

# compute the expected # PGs for our 4 OSDs
expected = {}
for i, w in osds:
  expected[i] = NPGS * R * weights[i]/total_weight

print '\nExpected:  ', expected
for r in xrange(R):
  print '  Replica %d: %s' % (r, [float(v/R) for k,v in expected.iteritems()])

# initialize PG and PG-by-replica-number counters for each OSD
pgs = {}
for i, w in osds:
  pgs[i] = 0
pgs_by_r = [dict(pgs) for r in xrange(R)]

# this function returns a random weighted choice
def weighted_choice(choices):
   total = sum(w for c, w in choices)
   r = random.uniform(0, total)
   upto = 0
   for c, w in choices:
      if upto + w >= r:
         return c
      upto += w
   assert False, "Shouldn't get here"

print
print "### Simulating with existing CRUSH (demonstrate the multi-pick anomoly)"
print

# Existing CRUSH
# loop over all PGs, select one with weighted choice, remove that option for the next choice.
for i in xrange(NPGS):
  choices = list(osds)
  for r in xrange(R):
    pick = weighted_choice(choices)
    pgs[pick] += 1
    pgs_by_r[r][pick] += 1
    choices.remove((pick, weights[pick]),)

print 'Observed:   ', pgs
for r in xrange(R):
  print '  Replica %d: %s' % (r, pgs_by_r[r])

print
print "### Simulating using PACE's matrix selection method"
print
# re-initialize PG and PG-by-replicanum counters for each OSD
pgs = {}
for i,w in osds:
  pgs[i] = 0
pgs_by_r = [dict(pgs) for r in xrange(R)]

import numpy as np
import itertools

# generate a list of all OSD combinations
osdsets = list(itertools.combinations([id for id,w in osds], R))
print 'All OSD Combinations:', osdsets
print

# Compute the first N rows of matrix A
AT = [] # this is the transpose of A
for osdset in osdsets:
  ATi = []
  for i, w in osds:
    if i in osdset:
      ATi.append(1.0)
    else:
      ATi.append(0.0)
  AT.append(ATi)
A = list(np.transpose(AT)) # Get A and store it as a list so we can later append

# Generate the first N entries in matrix B
B = [w for i, w in osds]

# Now we need to add additional constraints so that A x = B is solveable.

# Heuristic #1
# Set the probability of the first OSD combination to 1.0
# This doesn't always work -- maybe it *would* always work if we first sort the OSD combinations by their weight sums.

# if A is not square:
if len(A) < len(A[0]):
  Ai = [0.0 for k in xrange(len(A[0]))]
  Ai[0] = 1.0
  A.append(Ai)
  B.append(1.0)

# Heuristic #2
# For each OSD combination, find another having identically weighted osds. Add this constraint using A += (.., 1,..,-1,..) and B += 0

# loop over all i,j pairs of OSD combinations
for i in xrange(len(osdsets)):
  for j in xrange(i+1,len(osdsets)):
    assert i != j, "Shouldn't be possible!"

    # if A is not square
    if len(A) < len(A[0]):
      Ai = [0.0 for k in xrange(len(A[0]))]
      identical = True
      for r in xrange(R):
        i_weight = weights[osdsets[i][r]]
        j_weight = weights[osdsets[j][r]]
        if i_weight != j_weight:
          identical = False
      if identical:
        Ai[i] = 1.0
        Ai[j] = -1.0
        A.append(Ai)
        B.append(0.0)
    else:
      break

assert len(A) == len(A[0]), 'Coefficient matrix A is not square!'

# Now solve for x where A x = B

print 'Solving A x = B'
print
print 'A ='
print np.array(A)
print

print 'B ='
print np.array(B)
print

x = np.linalg.solve(A,B)

print 'x ='
print x
print

for weight in x:
  assert weight >= 0.0, 'Negative weight, you either added impossible constraints or the OSD weights are unusable!'

# store the weighed OSD combinations so they can be used by the weighted_choice function
weighted_osdsets = []
i = 0
for osdset in osdsets:
  weighted_osdsets.append((osdset,x[i]))
  i += 1

# select your NPGS PGs.
for i in xrange(NPGS):
  # make a weighted random choice
  combo = list(weighted_choice(weighted_osdsets))
  # shuffle the items in this choice
  random.shuffle(combo)

  # record this choice
  r = 0
  for pick in combo:
    pgs[pick] += 1
    pgs_by_r[r][pick] += 1
    r += 1

print 'Observed:   ', pgs
for r in xrange(R):
  print '  Replica %d: %s' % (r, pgs_by_r[r])
	#!/usr/bin/env python

	import random
	from math import sqrt, log, pow

	DEBUG = True
	NPGS = 100000 # num PGs
	R = 3 # num replicas

	if NPGS > 3:
	DEBUG = False

	print 'Num PGs:', NPGS
	print 'Num Replicas:', R

	# OSDs: (id, weight)
	# Try using different sets of OSDs here! But note that the heuristics below are really tricky!
	osds = [(0, 3.0), (1, 3.0), (2, 3.0), (3, 1.0)]
	#osds = [(i,3.0) for i in xrange(5)]
	#osds[0] = (0,4.0)
	#osds[2] = (2,3.0)

	assert R < len(osds), 'Must have fewer replicas than OSDs in this prototype'

	# store the OSD weights in a dict for later usage
	weights = {}
	total_weight = 0
	for i, w in osds:
	weights[i] = float(w)
	total_weight += float(w)

	print 'OSDs (id: weight):', weights

	# compute the expected # PGs for our 4 OSDs
	expected = {}
	for i, w in osds:
	expected[i] = NPGS * R * weights[i]/total_weight

	print '\nExpected: ', expected
	for r in xrange(R):
	print ' Replica %d: %s' % (r, [float(v/R) for k,v in expected.iteritems()])

	# initialize PG and PG-by-replica-number counters for each OSD
	pgs = {}
	for i, w in osds:
	pgs[i] = 0
	pgs_by_r = [dict(pgs) for r in xrange(R)]

	# this function returns a random weighted choice
	def weighted_choice(choices):
	total = sum(w for c, w in choices)
	r = random.uniform(0, total)
	upto = 0
	for c, w in choices:
	if upto + w >= r:
	return c
	upto += w
	assert False, "Shouldn't get here"

	print
	print "### Simulating with existing CRUSH (demonstrate the multi-pick anomoly)"
	print

	# Existing CRUSH
	# loop over all PGs, select one with weighted choice, remove that option for the next choice.
	for i in xrange(NPGS):
	choices = list(osds)
	for r in xrange(R):
	pick = weighted_choice(choices)
	pgs[pick] += 1
	pgs_by_r[r][pick] += 1
	choices.remove((pick, weights[pick]),)

	print 'Observed: ', pgs
	for r in xrange(R):
	print ' Replica %d: %s' % (r, pgs_by_r[r])

	print
	print "### Simulating using PACE's matrix selection method"
	print
	# re-initialize PG and PG-by-replicanum counters for each OSD
	pgs = {}
	for i,w in osds:
	pgs[i] = 0
	pgs_by_r = [dict(pgs) for r in xrange(R)]

	import numpy as np
	import itertools

	# generate a list of all OSD combinations
	osdsets = list(itertools.combinations([id for id,w in osds], R))
	print 'All OSD Combinations:', osdsets
	print

	# Compute the first N rows of matrix A
	AT = [] # this is the transpose of A
	for osdset in osdsets:
	ATi = []
	for i, w in osds:
	if i in osdset:
	ATi.append(1.0)
	else:
	ATi.append(0.0)
	AT.append(ATi)
	A = list(np.transpose(AT)) # Get A and store it as a list so we can later append

	# Generate the first N entries in matrix B
	B = [w for i, w in osds]

	# Now we need to add additional constraints so that A x = B is solveable.

	# Heuristic #1
	# Set the probability of the first OSD combination to 1.0
	# This doesn't always work -- maybe it would always work if we first sort the OSD combinations by their weight sums.

	# if A is not square:
	if len(A) < len(A[0]):
	Ai = [0.0 for k in xrange(len(A[0]))]
	Ai[0] = 1.0
	A.append(Ai)
	B.append(1.0)

	# Heuristic #2
	# For each OSD combination, find another having identically weighted osds. Add this constraint using A += (.., 1,..,-1,..) and B += 0

	# loop over all i,j pairs of OSD combinations
	for i in xrange(len(osdsets)):
	for j in xrange(i+1,len(osdsets)):
	assert i != j, "Shouldn't be possible!"

	# if A is not square
	if len(A) < len(A[0]):
	Ai = [0.0 for k in xrange(len(A[0]))]
	identical = True
	for r in xrange(R):
	i_weight = weights[osdsets[i][r]]
	j_weight = weights[osdsets[j][r]]
	if i_weight != j_weight:
	identical = False
	if identical:
	Ai[i] = 1.0
	Ai[j] = -1.0
	A.append(Ai)
	B.append(0.0)
	else:
	break

	assert len(A) == len(A[0]), 'Coefficient matrix A is not square!'

	# Now solve for x where A x = B

	print 'Solving A x = B'
	print
	print 'A ='
	print np.array(A)
	print

	print 'B ='
	print np.array(B)
	print

	x = np.linalg.solve(A,B)

	print 'x ='
	print x
	print

	for weight in x:
	assert weight >= 0.0, 'Negative weight, you either added impossible constraints or the OSD weights are unusable!'

	# store the weighed OSD combinations so they can be used by the weighted_choice function
	weighted_osdsets = []
	i = 0
	for osdset in osdsets:
	weighted_osdsets.append((osdset,x[i]))
	i += 1

	# select your NPGS PGs.
	for i in xrange(NPGS):
	# make a weighted random choice
	combo = list(weighted_choice(weighted_osdsets))
	# shuffle the items in this choice
	random.shuffle(combo)

	# record this choice
	r = 0
	for pick in combo:
	pgs[pick] += 1
	pgs_by_r[r][pick] += 1
	r += 1

	print 'Observed: ', pgs
	for r in xrange(R):
	print ' Replica %d: %s' % (r, pgs_by_r[r])