Skip to content

Instantly share code, notes, and snippets.

@dylan-evans
Created February 7, 2011 14:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dylan-evans/814453 to your computer and use it in GitHub Desktop.
Save dylan-evans/814453 to your computer and use it in GitHub Desktop.
This snippet generates some reasonably normal statistics. I wrote it as an experiment to find a way to generate this data for a game, which i never wrote, so precision was never a goal. I had trouble finding information about doing this sort of thing, but
#!/usr/bin/python
# Generate a (roughly normal) sample population for a given mean & standard deviation.
# This was intended for use modelling hypothetical demographics in a sim game.
"""\
Demonstration of the StatGen class
This script demonstrates the StatGen class which generates a normalized sample
from a mean and standard deviation. The algorithm can be adjusted, tweaked and
broken in many different ways which can provide some interesting results, but
the current form provides an almost bell curve sample. Well it's more of a
volcano than a bell curve, use your imagination!
If matplotlib is available this script displays a histogram representation, this
is recommended.
The purpose of the algorithm was to provide sample data for a game
"""
from math import sqrt
from optparse import OptionParser
from random import random
class StatGen(object):
"""\
Statistic generation class.
This class generates a statistical population based on a given mean,
standard deviation and sample size. While the result is not intended to
be perfect the resulting sample should be more or less normalized which
is achieved by using both a floating mean and standard deviation, and while
i came up with the maths from scratch i have difficulty explaining it.
"""
limit = 2.6 # Theoretical limit in standard deviations
def __init__(self, weight, mean, std_dev, verbose=False):
self.verbose = verbose
self.weight = weight + 1
self.orig_mean = self.mean = mean
self.std_dev = std_dev
self.toggle = 1
self.next(); self.next()
def next(self):
"""Get the next value"""
# Create the offset from the current std_dev
# The offset is the actual limit of the value
offset = self.limit * self.std_dev
val = offset * random() # Calculate the raw value
#val = (sqrt(random())-self.limit*self.orig_std_dev)*(self.std_dev)
# Decide whether the value should be greater or lesser than the mean.
# This method simply alternates
if self.toggle:
self.toggle = 0
val += self.mean
else:
self.toggle = 1
val = self.mean - val
# Alternate method, probably a better sample but slower
#if random() > 0.5:
# val += self.mean
#else:
# val = self.mean - val
if self.weight > 1:
# Recalculate the standard deviation
diff = (val - self.mean) ** 2
inter = (self.std_dev ** 2) * self.weight - diff
self.weight -= 1
try:
self.std_dev = sqrt(inter / self.weight)
except:
pass
# Calculate the new mean
mean = (self.mean / self.weight) * (self.weight + 1) - (val / self.weight)
self.mean = (mean - self.mean) / 2
val += self.orig_mean # Align the value by adding the original mean
if self.verbose:
print "value: %f new mean: %f" % (val, self.mean)
return val
if __name__ == '__main__':
# Test program portion
# Parse command line
parser = OptionParser()
parser.add_option('-p', '--population', action='store', type='int', default=1000)
# I am aware that population is technically incorrect, however it does start
# with a 'P' giving an advantage over 'sample' and 'size'. See -s
parser.add_option('-m', '--mean', action='store', type='float', default=0.5)
parser.add_option('-s', '--stddev', action='store', type='float', dest='std_dev', default=0.2)
parser.add_option('-v', '--verbose', action='store_true', default=False)
parser.add_option('-M', '--no-matplotlib', action='store_true', default=False)
(options, args) = parser.parse_args()
if options.no_matplotlib:
plt = False
else:
try:
import matplotlib.pyplot as plt
#from pylab import hist
except:
print "matplotlib not available"
plt = False
g = StatGen(options.population, options.mean, options.std_dev, options.verbose)
values = []
errors = []
rmean = 0
for i in range(0, options.population):
# Generate the sample and add it to the values list
val = g.next()
if val > 1 or val < 0:
#Out of bounds values, there are usually a couple
errors.append(val)
values.append(val)
rmean += val
print "Summary:"
rmean = rmean / options.population
diff = rmean - options.mean
print "Mean: %f / %f :: %f %f%% difference" % (rmean, options.mean, diff, (diff / rmean) * 100)
adev = 0
for val in values:
adev += (val - options.mean) ** 2
adev = sqrt(adev / options.population)
print "Deviation: %f / %f :: %f %f%% difference" % (adev, options.std_dev,
adev - options.std_dev,
((adev-options.std_dev)/options.std_dev)*100)
if options.verbose:
print "Errors:"
for v in errors:
print v
if plt: #show histogram
# Display graph
#h = hist(values, bins=200)
plt.hist(values, bins=options.population*0.2)
plt.title("Mean result: %f Difference: %f %f%%" % (rmean, diff, (diff / rmean) * 100))
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment