Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Scaffolding for a re-analysis of the data from '"What Went Right and What Went Wrong": An Analysis of 155 Postmortems from Game Development'
#!/usr/bin/env python
################################################################################
# Copyright (c) 2017 Nathan Hwang, "thenoviceoof"
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
################################################################################
# An example invocation used with the 155 reanalysis is:
# ./analysis.py clean_rows.csv --verbose --t_test --fischer_test \
# --barnard_csm_test --barnard_boschloo_test
import argparse
import csv
import os
import subprocess
import pickle
import tempfile
INTERMEDIATE_RESULTS_FILE = '.analysis_intermediate.pickle'
################################################################################
# Define script arguments.
parser = argparse.ArgumentParser('Runs various proportion tests on all '
'combinations of independent/dependent variables')
parser.add_argument('csv_file',
help='CSV file to read from, with only iv/dv rows')
parser.add_argument('-v', '--verbose', action='store_true',
help='Display progress messages')
parser.add_argument('-i', '--intermediate', action='store_true',
help='Save intermediate results')
parser.add_argument('--clear', action='store_true',
help='Delete any saved intermediate results')
parser.add_argument('--noraw', dest='raw', action='store_false',
help='Don\'t include raw numbers')
parser.add_argument('--prop_test', action='store_true',
help='Run prop.test')
parser.add_argument('--prop_correct_test', action='store_true',
help='Run prop.test with continuity correction')
parser.add_argument('--t_test', action='store_true',
help='Run t.test')
parser.add_argument('--fischer_test', action='store_true',
help='Run fischer.test')
parser.add_argument('--barnard_csm_test', action='store_true',
help='Run Barnard\'s CSM test (from Exact)')
parser.add_argument('--barnard_csm_approx_test', action='store_true',
help='Run Barnard\'s approximate CSM test (from Exact)')
parser.add_argument('--barnard_boschloo_test', action='store_true',
help='Run Barnard\'s Boschloo test (from Exact)')
args = parser.parse_args()
################################################################################
# Read input
with open(args.csv_file) as csv_file:
csv_reader = csv.DictReader(csv_file)
fieldnames = csv_reader.fieldnames
data = [row for row in csv_reader]
iv_names = sorted([name for name in fieldnames if name.startswith('iv_')])
dv_names = sorted([name for name in fieldnames if name.startswith('dv_')])
# Handle intermediate results.
INTERMEDIATE_RESULTS = {}
def read_intermediate():
global INTERMEDIATE_RESULTS
intermediate_file = open(INTERMEDIATE_RESULTS_FILE)
intermediate_contents = pickle.load(intermediate_file)
if intermediate_contents['args'] != str(args):
print 'Arguments changed, discarding intermediate results'
# print '"{}" vs "{}"'.format(intermediate_contents['args'], str(args))
INTERMEDIATE_RESULTS = {}
intermediate_file.close()
return
INTERMEDIATE_RESULTS = intermediate_contents['results']
intermediate_file.close()
def write_intermediate():
intermediate_file = open(INTERMEDIATE_RESULTS_FILE, 'w')
intermediate_contents = {
'args': str(args),
'results': INTERMEDIATE_RESULTS,
}
pickle.dump(intermediate_contents, intermediate_file)
intermediate_file.close()
if args.clear and os.path.exists(INTERMEDIATE_RESULTS_FILE):
os.remove(INTERMEDIATE_RESULTS_FILE)
if args.intermediate and os.path.exists(INTERMEDIATE_RESULTS_FILE):
read_intermediate()
else:
INTERMEDIATE_RESULTS = {}
################################################################################
# Run the requested analyses
output = []
test_args = [
args.prop_test,
args.prop_correct_test,
args.t_test,
args.fischer_test,
args.barnard_csm_test,
args.barnard_csm_approx_test,
args.barnard_boschloo_test,
]
test_args = [str(t) for t in test_args]
i = 0
length = len(dv_names) * len(iv_names)
for dv in dv_names:
for iv in iv_names:
# Use intermediate results if available.
if args.intermediate and (iv, dv) in INTERMEDIATE_RESULTS:
output.append(INTERMEDIATE_RESULTS[(iv, dv)])
i += 1
print 'Progress: {}/{}'.format(i, length)
continue
# Count up the true/false values.
iv_true_dv_true = sum([1 for row in data
if row[iv] == 'True' and row[dv] == 'True'])
iv_true_dv_false = sum([1 for row in data
if row[iv] == 'True' and row[dv] == 'False'])
iv_false_dv_true = sum([1 for row in data
if row[iv] == 'False' and row[dv] == 'True'])
iv_false_dv_false = sum([1 for row in data
if row[iv] == 'False' and row[dv] == 'False'])
iv_true = iv_true_dv_true + iv_true_dv_false
iv_false = iv_false_dv_true + iv_false_dv_false
if iv_true and iv_false:
# Use R to run tests
_, scratch_path = tempfile.mkstemp(suffix='.csv')
with open(scratch_path, 'w') as scratch_file:
headers = ['itdt', 'itdf', 'ifdt', 'ifdf']
csv_scratch = csv.DictWriter(scratch_file, headers)
csv_scratch.writeheader()
csv_scratch.writerow({
'itdt': iv_true_dv_true,
'itdf': iv_true_dv_false,
'ifdt': iv_false_dv_true,
'ifdf': iv_false_dv_false,
})
script_output = subprocess.check_output(
['Rscript', 'analysis.R', scratch_path] + test_args)
lines = script_output.split('\n')
z_p, zc_p, t_p, f_p, b_csm_p, b_csm_approx_p, b_boschloo_p = \
[(float(l) if l else l) for l in lines]
os.remove(scratch_path)
else:
# Don't give a value if we can't calculate it (one side is entirely
# missing).
z_p, zc_p, t_p, f_p, b_csm_p, b_csm_approx_p, b_boschloo_p = \
('', '', '', '', '', '', '')
# Build the output csv.
out_row = {
'ind_var': iv,
'dep_var': dv,
'ind_t': iv_true,
'ind_f': iv_false
}
if args.raw:
out_row.update({
'ind_t__dep_t': iv_true_dv_true,
'ind_t__dep_f': iv_true_dv_false,
'ind_f__dep_t': iv_false_dv_true,
'ind_f__dep_f': iv_false_dv_false,
})
if args.prop_test:
out_row['ztest_p'] = z_p
if args.prop_correct_test:
out_row['zctest_p'] = zc_p
if args.t_test:
out_row['ttest_p'] = t_p
if args.fischer_test:
out_row['fischer_p'] = f_p
if args.barnard_csm_test:
out_row['barnard_csm_p'] = b_csm_p
if args.barnard_csm_approx_test:
out_row['barnard_csm_approx_p'] = b_csm_approx_p
if args.barnard_boschloo_test:
out_row['barnard_csm_p'] = b_boschloo_p
output.append(out_row)
# Save intermediate results
if args.intermediate:
INTERMEDIATE_RESULTS[(iv, dv)] = out_row
write_intermediate()
i += 1
print 'Progress: {}/{}'.format(i, length)
################################################################################
# Output
headers = ['ind_var', 'dep_var', 'ind_t', 'ind_f']
if args.raw:
headers += ['ind_t__dep_t', 'ind_t__dep_f', 'ind_f__dep_t', 'ind_f__dep_f']
if args.prop_test:
headers.append('ztest_p')
if args.prop_correct_test:
headers.append('zctest_p')
if args.t_test:
headers.append('ttest_p')
if args.fischer_test:
headers.append('fischer_p')
if args.barnard_csm_test:
headers.append('barnard_csm_p')
if args.barnard_csm_approx_test:
headers.append('barnard_csm_approx_p')
if args.barnard_boschloo_test:
headers.append('barnard_csm_p')
with open('analysis_rows.csv', 'w') as output_file:
csv_scratch = csv.DictWriter(output_file, headers)
csv_scratch.writeheader()
for row in output:
csv_scratch.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment