michaelmelanson/mbti.py

## mbti.py
from scipy.stats.distributions import binom

data = [
    {'name': 'ISTJ', 'count': 12,  'expected': 0.1160},
    {'name': 'ISFJ', 'count': 4,   'expected': 0.1380},
    {'name': 'INFJ', 'count': 14,  'expected': 0.0150},
    {'name': 'INTJ', 'count': 206, 'expected': 0.0210},
    {'name': 'ISTP', 'count': 16,  'expected': 0.0540},
    {'name': 'ISFP', 'count': 1,   'expected': 0.0880},
    {'name': 'INFP', 'count': 56,  'expected': 0.0430},
    {'name': 'INTP', 'count': 191, 'expected': 0.0430},
    {'name': 'ESTP', 'count': 3,   'expected': 0.0430},
    {'name': 'ESFP', 'count': 1,   'expected': 0.0850},
    {'name': 'ENFP', 'count': 22,  'expected': 0.0810},
    {'name': 'ENTP', 'count': 56,  'expected': 0.0330},
    {'name': 'ESTJ', 'count': 9,   'expected': 0.0870},
    {'name': 'ESFJ', 'count': 5,   'expected': 0.1230},
    {'name': 'ENFJ', 'count': 15,  'expected': 0.0240},
    {'name': 'ENTJ', 'count': 49,  'expected': 0.0180},
]

n = sum(group['count'] for group in data)

CONFIDENCE_INTERVAL = 0.99
def is_significant(prob): return prob < (1.0 - CONFIDENCE_INTERVAL)

print "  Using a binomial test at a %.0f%c confidence level." % (100*CONFIDENCE_INTERVAL, '%')
print ''
print 'Single-group results:'
for group in data:
    prob = binom.pmf(group['count'], n, group['expected'])
    freq = 100.0 * float(group['count']) / float(n)

    if is_significant(prob):
        sig = 'significant:    '
    else:
        sig = 'NOT significant:'

    print ("  %s is %s %5.2f%c (%3d/%d) vs. %5.2f%c expected." %
           (group['name'], sig,
            freq, '%',
            group['count'], n,
            (100.0 * group['expected']), '%'))


print ''
print 'Pair-wise results:'
try:
    from itertools import combinations
except ImportError:
    def combinations(iterable, r):
        # combinations('ABCD', 2) --> AB AC AD BC BD CD
        # combinations(range(4), 3) --> 012 013 023 123
        pool = tuple(iterable)
        n = len(pool)
        if r > n:
            return
        indices = range(r)
        yield tuple(pool[i] for i in indices)
        while True:
            for i in reversed(range(r)):
                if indices[i] != i + n - r:
                    break
            else:
                return
            indices[i] += 1
            for j in range(i+1, r):
                indices[j] = indices[j-1] + 1
            yield tuple(pool[i] for i in indices)

pairs = {}
for pair in combinations(range(4), 2):
    firstindex = pair[0]
    secondindex = pair[1]

    for group in data:
        pairname = ''.join([group['name'][firstindex],
                            group['name'][secondindex]])

        if not pairname in pairs: pairs[pairname] = {'actual': 0, 'expected': 0}
        pairs[pairname]['actual']   += group['count']
        pairs[pairname]['expected'] += group['expected']


for pairname, pair in pairs.iteritems():
    prob = binom.pmf(pair['actual'], n, pair['expected'])
    freq = 100.0 * float(pair['actual']) / float(n)

    if is_significant(prob):
        sig = 'significant:    '
    else:
        sig = 'NOT significant:'

    print ("  %s is %s %5.2f%c (%3d/%d) vs. %5.2f%c expected." %
           (pairname, sig,
            freq, '%',
            pair['actual'], n,
            (100.0 * pair['expected']), '%'))
	from scipy.stats.distributions import binom

	data = [
	{'name': 'ISTJ', 'count': 12, 'expected': 0.1160},
	{'name': 'ISFJ', 'count': 4, 'expected': 0.1380},
	{'name': 'INFJ', 'count': 14, 'expected': 0.0150},
	{'name': 'INTJ', 'count': 206, 'expected': 0.0210},
	{'name': 'ISTP', 'count': 16, 'expected': 0.0540},
	{'name': 'ISFP', 'count': 1, 'expected': 0.0880},
	{'name': 'INFP', 'count': 56, 'expected': 0.0430},
	{'name': 'INTP', 'count': 191, 'expected': 0.0430},
	{'name': 'ESTP', 'count': 3, 'expected': 0.0430},
	{'name': 'ESFP', 'count': 1, 'expected': 0.0850},
	{'name': 'ENFP', 'count': 22, 'expected': 0.0810},
	{'name': 'ENTP', 'count': 56, 'expected': 0.0330},
	{'name': 'ESTJ', 'count': 9, 'expected': 0.0870},
	{'name': 'ESFJ', 'count': 5, 'expected': 0.1230},
	{'name': 'ENFJ', 'count': 15, 'expected': 0.0240},
	{'name': 'ENTJ', 'count': 49, 'expected': 0.0180},
	]

	n = sum(group['count'] for group in data)

	CONFIDENCE_INTERVAL = 0.99
	def is_significant(prob): return prob < (1.0 - CONFIDENCE_INTERVAL)

	print " Using a binomial test at a %.0f%c confidence level." % (100*CONFIDENCE_INTERVAL, '%')
	print ''
	print 'Single-group results:'
	for group in data:
	prob = binom.pmf(group['count'], n, group['expected'])
	freq = 100.0 * float(group['count']) / float(n)

	if is_significant(prob):
	sig = 'significant: '
	else:
	sig = 'NOT significant:'

	print (" %s is %s %5.2f%c (%3d/%d) vs. %5.2f%c expected." %
	(group['name'], sig,
	freq, '%',
	group['count'], n,
	(100.0 * group['expected']), '%'))


	print ''
	print 'Pair-wise results:'
	try:
	from itertools import combinations
	except ImportError:
	def combinations(iterable, r):
	# combinations('ABCD', 2) --> AB AC AD BC BD CD
	# combinations(range(4), 3) --> 012 013 023 123
	pool = tuple(iterable)
	n = len(pool)
	if r > n:
	return
	indices = range(r)
	yield tuple(pool[i] for i in indices)
	while True:
	for i in reversed(range(r)):
	if indices[i] != i + n - r:
	break
	else:
	return
	indices[i] += 1
	for j in range(i+1, r):
	indices[j] = indices[j-1] + 1
	yield tuple(pool[i] for i in indices)

	pairs = {}
	for pair in combinations(range(4), 2):
	firstindex = pair[0]
	secondindex = pair[1]

	for group in data:
	pairname = ''.join([group['name'][firstindex],
	group['name'][secondindex]])

	if not pairname in pairs: pairs[pairname] = {'actual': 0, 'expected': 0}
	pairs[pairname]['actual'] += group['count']
	pairs[pairname]['expected'] += group['expected']



	for pairname, pair in pairs.iteritems():
	prob = binom.pmf(pair['actual'], n, pair['expected'])
	freq = 100.0 * float(pair['actual']) / float(n)

	if is_significant(prob):
	sig = 'significant: '
	else:
	sig = 'NOT significant:'

	print (" %s is %s %5.2f%c (%3d/%d) vs. %5.2f%c expected." %
	(pairname, sig,
	freq, '%',
	pair['actual'], n,
	(100.0 * pair['expected']), '%'))