Last active
April 27, 2018 14:39
-
-
Save wcarss/274d2418988ab59415d8470a85b04983 to your computer and use it in GitHub Desktop.
A test-file for an algorithm to pick-random values by a per-key distribution
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import random | |
from collections import defaultdict | |
# some utility classes for nicer printing + default-ints | |
class PrettyDefaultDict(defaultdict): | |
def pprint(self): | |
lines = [] | |
lines.append("{") | |
for k, v in self.iteritems(): | |
lines.append(" %s: %s" % (k, v)) | |
lines.append("}") | |
return "\n".join(lines) | |
__str__ = pprint | |
class Distribution(PrettyDefaultDict): | |
pass | |
class Results(PrettyDefaultDict): | |
pass | |
# setup: generate a random spec for a per-key random distribution | |
def generate_distribution(): | |
distribution_spec = Distribution() | |
keys = ["grass", "dirt", "mountain", "sand", "tree", "barrell", "brush", "grave", "path", "pavestone", "plank", "wall", "stone", "brick", "plaster", "gravel", "heath", "flowers", "deep_sea", "sea", "river", "shore", "wheat", "corn", "highway", "sidewalk", "metal", "pipes", "grate", "train_tracks", "hole"] | |
# we just want to grab a few random keys | |
key_sample = random.sample(keys, random.randint(1, len(keys)-1)) | |
# sane starts for your reading pleasure | |
last_key = None | |
total = 0 | |
offset = 0 | |
amount = 0 | |
for key in key_sample: | |
last_key = key | |
total += amount # have to do this first so we have a gap at the end | |
offset = random.uniform( | |
(-1.0/len(key_sample))/4.0, | |
(1.0/len(key_sample))/4.0 | |
) | |
amount = 1.0/len(key_sample) + offset # a bit of noise | |
distribution_spec[key] = amount | |
distribution_spec[last_key] = 1.0 - total # make sum add to 1 | |
return distribution_spec | |
# just a harness to run the test N times and gather results | |
def repeatedly_pick(distribution, number_of_picks): | |
results = Results(int) | |
for i in xrange(number_of_picks): | |
pick = distribution_pick(distribution) | |
results[pick] += 1 | |
result_count = len(distribution) | |
for k, v in results.iteritems(): | |
results[k] = { | |
"total": v, | |
"rate": v / float(number_of_picks) | |
} | |
return results | |
# the actual algorithm we're testing | |
def distribution_pick(distribution): | |
barrier = random.uniform(0, 1) # uniform distribution float | |
probability_sum = 0 | |
keys = distribution.keys() | |
iterator = 0 | |
while iterator < len(keys): | |
# build up sum of probabilities of all keys passed | |
probability_sum += distribution[keys[iterator]] | |
# at some point the sum will exceed the threshold | |
if probability_sum > barrier: | |
break | |
iterator += 1 | |
# last value of i is point where sum > barrier or the end of the list | |
return keys[iterator] | |
# just a visual test: print a spec and print result of picking by algo | |
def test(number_of_tests): | |
number_of_picks = 10000 | |
for test_index in range(number_of_tests): | |
if test_index > 0: # just nicer output | |
print "\n" | |
print "test %s of %s: " % (test_index + 1, number_of_tests) | |
print "" | |
print "distribution spec: " | |
distribution = generate_distribution() | |
print distribution | |
print "" | |
print ("result of picking %s times: " % number_of_picks) | |
results = repeatedly_pick(distribution, number_of_picks) | |
print results | |
if __name__ == "__main__": | |
test(int(sys.argv[1])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment