Skip to content

Instantly share code, notes, and snippets.

@toinetoine
Created October 1, 2015 20:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save toinetoine/c785e42831f2e24131b8 to your computer and use it in GitHub Desktop.
Save toinetoine/c785e42831f2e24131b8 to your computer and use it in GitHub Desktop.
import math
from math import *
# utility for calulating likelihood of having a value as many sd's
# away from the mean as the value is
def phi(val, mean, standard_deviation):
num_sds = float(abs(mean - val)/standard_deviation)
phi_low = float(1.0 + erf(-1.0 * num_sds / sqrt(2.0))) / 2.0
phi_high = float(1.0 + erf(num_sds / sqrt(2.0))) / 2.0
return (phi_low + (1.0 - phi_high))
# calculate the means for each criteria across all of the essays
def calculate_means(essays):
criteria_difference_sums = [0 for i in range(5)]
# sum the difference for all criterias
for essay in essays:
for criteria_i in range(len(essay[0])):
criteria_difference_sums[criteria_i] += abs(
essay[0][criteria_i] - essay[1][criteria_i])
# calculate means for each criteria
criteria_means = list()
for difference in criteria_difference_sums:
criteria_means.append(float(difference)/float(len(essays)))
return criteria_means
# calculate the standard deviation over all of the essays
def calculate_standard_deviations(criteria_means, essays):
# calculate the sum of the differences for each of the criteria
sum_squared_differences_to_mean = [0 for i in range(len(criteria_means))]
for essay in essays:
for criteria_i in range(len(essay[0])):
ratings_difference = abs(essay[0][criteria_i] - essay[1][criteria_i])
sum_squared_differences_to_mean[criteria_i] += pow(
float(ratings_difference) - criteria_means[criteria_i], 2)
# calculate the standard deviations
standard_deviations = list()
for sum_difference in sum_squared_differences_to_mean:
standard_deviations.append(float(sum_difference)/float(len(essays)))
return standard_deviations
# calculate agreement scores for each of the essays
def calculate_agreement_scores(means, standard_deviations, essays):
likelihood_scores = list()
for essay in essays:
this_essay_liklihoods = list()
for criteria_i in range(len(essay[0])):
ratings_difference = abs(essay[0][criteria_i] - essay[1][criteria_i])
likelihood = phi(ratings_difference, means[criteria_i], standard_deviations[criteria_i])
this_essay_liklihoods.append(likelihood)
#print(str(essay) + "[" + str(criteria_i) + "] = " + str(likelihood))
print("diff: " + str(ratings_difference) + ", mean: " + str(means[criteria_i]) + ", sd: " + str(standard_deviations[criteria_i]) + ", likelihood:" + str(likelihood))
likelihood_scores.append(this_essay_liklihoods)
agreement_scores = list()
for essay_liklihoods in likelihood_scores:
essay_agreement_score = 0.0
for criteria_i in range(len(essay_liklihoods)):
essay_agreement_score += essay_liklihoods[criteria_i]
agreement_scores.append(round(essay_agreement_score, 3))
return agreement_scores
# each essay has two sets of 5 representing the 5 criteria
# scores given by each of the graders.
essays = [
[[3, 4, 4, 4, 3], [4, 4, 3, 3, 3]],
[[5, 4, 4, 4, 5], [5, 4, 5, 3, 5]],
[[3, 4, 4, 4, 3], [4, 5, 3, 3, 4]],
[[2, 4, 1, 4, 3], [4, 4, 3, 4, 3]],
[[1, 1, 1, 2, 1], [1, 2, 2, 2, 1]],
[[4, 4, 2, 4, 3], [3, 4, 1, 4, 3]],
[[1, 1, 2, 1, 1], [2, 1, 2, 2, 1]]
]
criteria_means = calculate_means(essays)
criteria_standard_devs = calculate_standard_deviations(criteria_means, essays)
agreement_scores = calculate_agreement_scores(criteria_means, criteria_standard_devs, essays)
print(sorted(agreement_scores, reverse=True))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment