stringertheory/skittles.py

## skittles.py
import sys
import collections
import random

N_TRIALS = 100000
FLAVORS = ['Strawberry', 'Orange', 'Lemon', 'Apple', 'Grape']


def chisquared(values):
    mean = sum(values) / float(len(values))
    return sum((v - mean)**2/mean for v in values)


def read_file(filename='skittles.txt'):
    with open(filename) as infile:
        header = next(infile).strip().split()
        for line in infile:
            row = line.strip().split()
            yield dict(zip(header, [int(i) for i in row]))


# ignore things not in FLAVORS (like "Uncounted")
observed = collections.Counter()
for row in read_file():
    for key, value in row.items():
        if key in FLAVORS:
            observed[key] += value

print(observed, file=sys.stderr)
print(chisquared(observed.values()), file=sys.stderr)

for trial_number in range(N_TRIALS):

    # simulate uniform skittle flavors
    dist = collections.Counter()
    for i in range(sum(observed.values())):
        dist[FLAVORS[random.randint(0, len(FLAVORS) - 1)]] += 1

    print(chisquared(dist.values()), flush=True)
	import sys
	import collections
	import random

	N_TRIALS = 100000
	FLAVORS = ['Strawberry', 'Orange', 'Lemon', 'Apple', 'Grape']


	def chisquared(values):
	mean = sum(values) / float(len(values))
	return sum((v - mean)**2/mean for v in values)


	def read_file(filename='skittles.txt'):
	with open(filename) as infile:
	header = next(infile).strip().split()
	for line in infile:
	row = line.strip().split()
	yield dict(zip(header, [int(i) for i in row]))


	# ignore things not in FLAVORS (like "Uncounted")
	observed = collections.Counter()
	for row in read_file():
	for key, value in row.items():
	if key in FLAVORS:
	observed[key] += value

	print(observed, file=sys.stderr)
	print(chisquared(observed.values()), file=sys.stderr)

	for trial_number in range(N_TRIALS):

	# simulate uniform skittle flavors
	dist = collections.Counter()
	for i in range(sum(observed.values())):
	dist[FLAVORS[random.randint(0, len(FLAVORS) - 1)]] += 1

	print(chisquared(dist.values()), flush=True)