wcarss/probability_distribution_test.py

## probability_distribution_test.py
import sys
import random
from collections import defaultdict


# some utility classes for nicer printing + default-ints
class PrettyDefaultDict(defaultdict):
  def pprint(self):
    lines = []

    lines.append("{")
    for k, v in self.iteritems():
      lines.append("  %s: %s" % (k, v))
    lines.append("}")

    return "\n".join(lines)

  __str__ = pprint


class Distribution(PrettyDefaultDict):
  pass


class Results(PrettyDefaultDict):
  pass


# setup: generate a random spec for a per-key random distribution
def generate_distribution():
  distribution_spec = Distribution()
  keys = ["grass", "dirt", "mountain", "sand", "tree", "barrell", "brush", "grave", "path", "pavestone", "plank", "wall", "stone", "brick", "plaster", "gravel", "heath", "flowers", "deep_sea", "sea", "river", "shore", "wheat", "corn", "highway", "sidewalk", "metal", "pipes", "grate", "train_tracks", "hole"]

  # we just want to grab a few random keys
  key_sample = random.sample(keys, random.randint(1, len(keys)-1))

  # sane starts for your reading pleasure
  last_key = None
  total = 0
  offset = 0
  amount = 0

  for key in key_sample:
    last_key = key
    total += amount #  have to do this first so we have a gap at the end
    offset = random.uniform(
      (-1.0/len(key_sample))/4.0,
      (1.0/len(key_sample))/4.0
    )
    amount = 1.0/len(key_sample) + offset #  a bit of noise
    distribution_spec[key] = amount

  distribution_spec[last_key] = 1.0 - total #  make sum add to 1

  return distribution_spec


# just a harness to run the test N times and gather results
def repeatedly_pick(distribution, number_of_picks):
  results = Results(int)

  for i in xrange(number_of_picks):
    pick = distribution_pick(distribution)
    results[pick] += 1

  result_count = len(distribution)
  for k, v in results.iteritems():
    results[k] = {
      "total": v,
      "rate": v / float(number_of_picks)
    }

  return results


# the actual algorithm we're testing
def distribution_pick(distribution):
  barrier = random.uniform(0, 1)  # uniform distribution float
  probability_sum = 0
  keys = distribution.keys()
  iterator = 0

  while iterator < len(keys):
    # build up sum of probabilities of all keys passed
    probability_sum += distribution[keys[iterator]]

    # at some point the sum will exceed the threshold
    if probability_sum > barrier:
      break
    iterator += 1

  # last value of i is point where sum > barrier or the end of the list
  return keys[iterator]


# just a visual test: print a spec and print result of picking by algo
def test(number_of_tests):
  number_of_picks = 10000

  for test_index in range(number_of_tests):
    if test_index > 0:  # just nicer output
      print "\n"

    print "test %s of %s: " % (test_index + 1, number_of_tests)
    print ""

    print "distribution spec: "
    distribution = generate_distribution()
    print distribution
    print ""

    print ("result of picking %s times: " % number_of_picks)
    results = repeatedly_pick(distribution, number_of_picks)
    print results


if __name__ == "__main__":
  test(int(sys.argv[1]))
	import sys
	import random
	from collections import defaultdict


	# some utility classes for nicer printing + default-ints
	class PrettyDefaultDict(defaultdict):
	def pprint(self):
	lines = []

	lines.append("{")
	for k, v in self.iteritems():
	lines.append(" %s: %s" % (k, v))
	lines.append("}")

	return "\n".join(lines)

	__str__ = pprint


	class Distribution(PrettyDefaultDict):
	pass


	class Results(PrettyDefaultDict):
	pass


	# setup: generate a random spec for a per-key random distribution
	def generate_distribution():
	distribution_spec = Distribution()
	keys = ["grass", "dirt", "mountain", "sand", "tree", "barrell", "brush", "grave", "path", "pavestone", "plank", "wall", "stone", "brick", "plaster", "gravel", "heath", "flowers", "deep_sea", "sea", "river", "shore", "wheat", "corn", "highway", "sidewalk", "metal", "pipes", "grate", "train_tracks", "hole"]

	# we just want to grab a few random keys
	key_sample = random.sample(keys, random.randint(1, len(keys)-1))

	# sane starts for your reading pleasure
	last_key = None
	total = 0
	offset = 0
	amount = 0

	for key in key_sample:
	last_key = key
	total += amount # have to do this first so we have a gap at the end
	offset = random.uniform(
	(-1.0/len(key_sample))/4.0,
	(1.0/len(key_sample))/4.0
	)
	amount = 1.0/len(key_sample) + offset # a bit of noise
	distribution_spec[key] = amount

	distribution_spec[last_key] = 1.0 - total # make sum add to 1

	return distribution_spec


	# just a harness to run the test N times and gather results
	def repeatedly_pick(distribution, number_of_picks):
	results = Results(int)

	for i in xrange(number_of_picks):
	pick = distribution_pick(distribution)
	results[pick] += 1

	result_count = len(distribution)
	for k, v in results.iteritems():
	results[k] = {
	"total": v,
	"rate": v / float(number_of_picks)
	}

	return results


	# the actual algorithm we're testing
	def distribution_pick(distribution):
	barrier = random.uniform(0, 1) # uniform distribution float
	probability_sum = 0
	keys = distribution.keys()
	iterator = 0

	while iterator < len(keys):
	# build up sum of probabilities of all keys passed
	probability_sum += distribution[keys[iterator]]

	# at some point the sum will exceed the threshold
	if probability_sum > barrier:
	break
	iterator += 1

	# last value of i is point where sum > barrier or the end of the list
	return keys[iterator]


	# just a visual test: print a spec and print result of picking by algo
	def test(number_of_tests):
	number_of_picks = 10000

	for test_index in range(number_of_tests):
	if test_index > 0: # just nicer output
	print "\n"

	print "test %s of %s: " % (test_index + 1, number_of_tests)
	print ""

	print "distribution spec: "
	distribution = generate_distribution()
	print distribution
	print ""

	print ("result of picking %s times: " % number_of_picks)
	results = repeatedly_pick(distribution, number_of_picks)
	print results


	if __name__ == "__main__":
	test(int(sys.argv[1]))