Skip to content

Instantly share code, notes, and snippets.

@michaelmelanson
Created November 17, 2009 11:37
Show Gist options
  • Save michaelmelanson/236850 to your computer and use it in GitHub Desktop.
Save michaelmelanson/236850 to your computer and use it in GitHub Desktop.
from scipy.stats.distributions import binom
data = [
{'name': 'ISTJ', 'count': 12, 'expected': 0.1160},
{'name': 'ISFJ', 'count': 4, 'expected': 0.1380},
{'name': 'INFJ', 'count': 14, 'expected': 0.0150},
{'name': 'INTJ', 'count': 206, 'expected': 0.0210},
{'name': 'ISTP', 'count': 16, 'expected': 0.0540},
{'name': 'ISFP', 'count': 1, 'expected': 0.0880},
{'name': 'INFP', 'count': 56, 'expected': 0.0430},
{'name': 'INTP', 'count': 191, 'expected': 0.0430},
{'name': 'ESTP', 'count': 3, 'expected': 0.0430},
{'name': 'ESFP', 'count': 1, 'expected': 0.0850},
{'name': 'ENFP', 'count': 22, 'expected': 0.0810},
{'name': 'ENTP', 'count': 56, 'expected': 0.0330},
{'name': 'ESTJ', 'count': 9, 'expected': 0.0870},
{'name': 'ESFJ', 'count': 5, 'expected': 0.1230},
{'name': 'ENFJ', 'count': 15, 'expected': 0.0240},
{'name': 'ENTJ', 'count': 49, 'expected': 0.0180},
]
n = sum(group['count'] for group in data)
CONFIDENCE_INTERVAL = 0.99
def is_significant(prob): return prob < (1.0 - CONFIDENCE_INTERVAL)
print " Using a binomial test at a %.0f%c confidence level." % (100*CONFIDENCE_INTERVAL, '%')
print ''
print 'Single-group results:'
for group in data:
prob = binom.pmf(group['count'], n, group['expected'])
freq = 100.0 * float(group['count']) / float(n)
if is_significant(prob):
sig = 'significant: '
else:
sig = 'NOT significant:'
print (" %s is %s %5.2f%c (%3d/%d) vs. %5.2f%c expected." %
(group['name'], sig,
freq, '%',
group['count'], n,
(100.0 * group['expected']), '%'))
print ''
print 'Pair-wise results:'
try:
from itertools import combinations
except ImportError:
def combinations(iterable, r):
# combinations('ABCD', 2) --> AB AC AD BC BD CD
# combinations(range(4), 3) --> 012 013 023 123
pool = tuple(iterable)
n = len(pool)
if r > n:
return
indices = range(r)
yield tuple(pool[i] for i in indices)
while True:
for i in reversed(range(r)):
if indices[i] != i + n - r:
break
else:
return
indices[i] += 1
for j in range(i+1, r):
indices[j] = indices[j-1] + 1
yield tuple(pool[i] for i in indices)
pairs = {}
for pair in combinations(range(4), 2):
firstindex = pair[0]
secondindex = pair[1]
for group in data:
pairname = ''.join([group['name'][firstindex],
group['name'][secondindex]])
if not pairname in pairs: pairs[pairname] = {'actual': 0, 'expected': 0}
pairs[pairname]['actual'] += group['count']
pairs[pairname]['expected'] += group['expected']
for pairname, pair in pairs.iteritems():
prob = binom.pmf(pair['actual'], n, pair['expected'])
freq = 100.0 * float(pair['actual']) / float(n)
if is_significant(prob):
sig = 'significant: '
else:
sig = 'NOT significant:'
print (" %s is %s %5.2f%c (%3d/%d) vs. %5.2f%c expected." %
(pairname, sig,
freq, '%',
pair['actual'], n,
(100.0 * pair['expected']), '%'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment