Skip to content

Instantly share code, notes, and snippets.

@wcarss
Last active April 27, 2018 14:39
Show Gist options
  • Save wcarss/274d2418988ab59415d8470a85b04983 to your computer and use it in GitHub Desktop.
Save wcarss/274d2418988ab59415d8470a85b04983 to your computer and use it in GitHub Desktop.
A test-file for an algorithm to pick-random values by a per-key distribution
import sys
import random
from collections import defaultdict
# some utility classes for nicer printing + default-ints
class PrettyDefaultDict(defaultdict):
def pprint(self):
lines = []
lines.append("{")
for k, v in self.iteritems():
lines.append(" %s: %s" % (k, v))
lines.append("}")
return "\n".join(lines)
__str__ = pprint
class Distribution(PrettyDefaultDict):
pass
class Results(PrettyDefaultDict):
pass
# setup: generate a random spec for a per-key random distribution
def generate_distribution():
distribution_spec = Distribution()
keys = ["grass", "dirt", "mountain", "sand", "tree", "barrell", "brush", "grave", "path", "pavestone", "plank", "wall", "stone", "brick", "plaster", "gravel", "heath", "flowers", "deep_sea", "sea", "river", "shore", "wheat", "corn", "highway", "sidewalk", "metal", "pipes", "grate", "train_tracks", "hole"]
# we just want to grab a few random keys
key_sample = random.sample(keys, random.randint(1, len(keys)-1))
# sane starts for your reading pleasure
last_key = None
total = 0
offset = 0
amount = 0
for key in key_sample:
last_key = key
total += amount # have to do this first so we have a gap at the end
offset = random.uniform(
(-1.0/len(key_sample))/4.0,
(1.0/len(key_sample))/4.0
)
amount = 1.0/len(key_sample) + offset # a bit of noise
distribution_spec[key] = amount
distribution_spec[last_key] = 1.0 - total # make sum add to 1
return distribution_spec
# just a harness to run the test N times and gather results
def repeatedly_pick(distribution, number_of_picks):
results = Results(int)
for i in xrange(number_of_picks):
pick = distribution_pick(distribution)
results[pick] += 1
result_count = len(distribution)
for k, v in results.iteritems():
results[k] = {
"total": v,
"rate": v / float(number_of_picks)
}
return results
# the actual algorithm we're testing
def distribution_pick(distribution):
barrier = random.uniform(0, 1) # uniform distribution float
probability_sum = 0
keys = distribution.keys()
iterator = 0
while iterator < len(keys):
# build up sum of probabilities of all keys passed
probability_sum += distribution[keys[iterator]]
# at some point the sum will exceed the threshold
if probability_sum > barrier:
break
iterator += 1
# last value of i is point where sum > barrier or the end of the list
return keys[iterator]
# just a visual test: print a spec and print result of picking by algo
def test(number_of_tests):
number_of_picks = 10000
for test_index in range(number_of_tests):
if test_index > 0: # just nicer output
print "\n"
print "test %s of %s: " % (test_index + 1, number_of_tests)
print ""
print "distribution spec: "
distribution = generate_distribution()
print distribution
print ""
print ("result of picking %s times: " % number_of_picks)
results = repeatedly_pick(distribution, number_of_picks)
print results
if __name__ == "__main__":
test(int(sys.argv[1]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment