Skip to content

Instantly share code, notes, and snippets.

@andrwng
Last active October 4, 2019 05:21
Show Gist options
  • Save andrwng/7c24e8e26aec68c50741f92eb6f2e48d to your computer and use it in GitHub Desktop.
Save andrwng/7c24e8e26aec68c50741f92eb6f2e48d to your computer and use it in GitHub Desktop.
Power of two choices implementation for group selection in python
#!/usr/bin/python
import random
import numpy as np
from matplotlib import pyplot as plt
"""
Straight-forward implementation of PO2C.
"""
def po2c(num_buckets, num_groups, group_size):
buckets = [0 for i in range(num_buckets)]
for g in range(num_groups):
group = []
while len(group) < group_size:
# Select two and pick the one with fewer.
candidates = [i for i in range(num_buckets) if i not in group]
if len(candidates) == 0:
break
elif len(candidates) == 1:
group.append(candiates[0])
break
else:
two_buckets = random.sample(candidates, 2)
less_loaded_bucket = min(two_buckets, key=lambda i: buckets[i])
group.append(less_loaded_bucket)
for bucket_num in group:
buckets[bucket_num] += 1
return buckets
def main():
stddevs = []
for i in range(5000):
dist = po2c(10, 20, 5)
stddevs.append(np.std(dist))
print np.histogram(stddevs)
plt.hist(stddevs, bins=[i*0.1 for i in range(20)])
plt.xlabel("Stddev of tablet distribution across dirs")
plt.xticks(np.arange(0, 2, 0.1))
plt.ylabel("Num runs")
plt.show()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment