Skip to content

Instantly share code, notes, and snippets.

@johnmaxwelliv
Last active December 10, 2015 01:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnmaxwelliv/4357424 to your computer and use it in GitHub Desktop.
Save johnmaxwelliv/4357424 to your computer and use it in GitHub Desktop.
Analysis of data here (http://lesswrong.com/lw/fp5/2012_survey_results/) for possible Dunning-Kruger effects.
from scipy.stats.stats import pearsonr
import csv
def show_correlation(data, var1, var2):
has_both_vars = [line for line in data if line[var1] != ' ' and line[var2] != ' ']
r = pearsonr([float(line[var1]) for line in has_both_vars], [float(line[var2]) for line in has_both_vars])[0]
print('Correlation between "%s" and "%s": %.4f' % (var1, var2, r))
print(' ')
def show_breakdown(data, attribute, var):
print('Breakdown of average "%s" by "%s":' % (var, attribute))
chars = set([line[attribute] for line in data])
chars.remove(' ')
if attribute == 'LessWrongUse':
chars = [
"I lurk, but never registered an account",
"I've registered an account, but never posted",
"I've posted a comment, but never a top-level post",
"I've posted in Discussion, but not Main",
"I've posted in Main",
]
if attribute == 'Sequences':
chars = [
"Never even knew they existed until this moment",
"Know they existed, but never looked at them",
"Some, but less than 25%",
"About 25% of the Sequences",
"About 50% of the Sequences",
"About 75% of the Sequences",
"Nearly all of the Sequences",
]
for char in chars:
numbers = [float(line[var]) for line in data if line[attribute] == char and line[var] != ' ']
print("%10.4f \"%s\"" % \
(sum(numbers) * 1.0 / len(numbers), char))
print(' ')
with open('for_public.csv', 'r') as infile:
lines = [line for line in list(csv.DictReader(infile)) if \
# Data cleaning.
(line['SATscoresoutof2400'] == ' ' or float(line['SATscoresoutof2400']) <= 2400) and \
(line['SATscoresoutof1600'] == ' ' or float(line['SATscoresoutof1600']) <= 1600) and \
'1`8' not in line.values() and \
'~5' not in line.values()]
show_correlation(lines, 'IQ', 'KarmaScore')
show_correlation(lines, 'SATscoresoutof1600', 'KarmaScore')
show_correlation(lines, 'SATscoresoutof2400', 'KarmaScore')
show_correlation(lines, 'TimeinCommunity', 'KarmaScore')
show_breakdown(lines, 'LessWrongUse', 'IQ')
show_breakdown(lines, 'LessWrongUse', 'SATscoresoutof1600')
show_breakdown(lines, 'LessWrongUse', 'SATscoresoutof2400')
show_breakdown(lines, 'Sequences', 'KarmaScore')
show_breakdown(lines, 'LessWrongUse', 'TimeinCommunity')
show_breakdown(lines, 'LessWrongUse', 'AutismScore')
show_breakdown(lines, 'Profession', 'KarmaScore')
show_breakdown(lines, 'Degree', 'KarmaScore')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment