Skip to content

Instantly share code, notes, and snippets.

@sxlijin
Created December 7, 2016 09:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sxlijin/b2a14305e06c901492f1c6f23c9063e5 to your computer and use it in GitHub Desktop.
Save sxlijin/b2a14305e06c901492f1c6f23c9063e5 to your computer and use it in GitHub Desktop.
Parse scraped VOICE data into CSVs for analysis.
#!/usr/bin/env python3
from glob import glob as ls
import statistics
import itertools as it
import re
def expand(field):
field = ((index + 1, freq)
for (index, (_, freq))
in enumerate(field[:5]))
return (it.chain(*([score] * freq for score, freq in field)))
def median(field):
return statistics.median(expand(field))
def mean(field):
return statistics.mean(expand(field))
def pvar(field):
return statistics.pvariance(expand(field))
def main():
for f in ls('downloads/*'):
# fields is a list where each entry is the responses to a survey
# question, formatted as
#
# [ [answer1, #], [answer2, #], ..., [answer5, #], [no response , #] ]
fields = ((item.split(',') for item in line.strip().split(':'))
for line
in open(f, 'rU').readlines())
# 0: effectiveness of communication
# 1: helpfulness of instructor outside class
# 2: effectiveness of instructor at stimulating interest
# 3: overall rating of instructor
# 4: grading standards of course
# 5: requirements for course
# 6: how much was learned in the course
# 7: how effective course was at intellectual challenge
# 8: overall rating of course
# 9: reason students took the course
# 10: interest in subject prior to course
# 11: hours spent on course per week
fields = [ [(choice, int(freq)) for (choice, freq) in line]
for line in fields ]
# WARNING: fields[x][5] always corresponds to the # that did not respond
proportion_why = sum(freq for _, freq in fields[9][0:3]
)/sum(count for _, count in fields[9][:5])
wt_avg_rating = mean(fields[8])
wt_avg_challs = mean(fields[7])
wt_avg_learned = mean(fields[6])
pvar_rating = pvar(fields[8])
class_level = int(re.search('[0-9]', f).group())
print('%f,%f' % (class_level, wt_avg_challs))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment