Skip to content

Instantly share code, notes, and snippets.

@nasifimtiazohi
Created April 18, 2019 18:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nasifimtiazohi/1b2f261847ebd555dd4106f37c18df52 to your computer and use it in GitHub Desktop.
Save nasifimtiazohi/1b2f261847ebd555dd4106f37c18df52 to your computer and use it in GitHub Desktop.
Measure (weighted) Kohen's Kappa agreement rate between two raters' ratings
import os
import openpyxl
''' let's assume there is an excel sheet
where two column holds ratings from two raters,
this script measures the agreement rate between them'''
#In this sample, the ratings are for sentiment
#possible ratings are: positive, negative, neutral
wb=openpyxl.load_workbook('test.xlsx')
ws=wb.get_active_sheet()
bucket={}
for i in range(start,end): #start row to end row in excel sheet
if ws['columnn_rater1'+str(i)].value==None:
break
a=ws['columnn_rater1'+str(i)].value
b=ws['columnn_rater2'+str(i)].value
if (a,b) not in bucket.keys():
bucket[(a,b)]=1
else:
bucket[(a,b)]+=1
#calculate weighted kohen's kappa
### this part is for weighted kappa in sentiment rating
### where disagreement of positive vs negative will be weighted more
### than disagreement of neutral vs polar (pos or neg)
### if not weighted kappa, then observation_sum will
### always be bucket[k] when k[0]!=k[1]
observation_sum=0
for k in bucket.keys():
if k[0]==k[1]:
observation_sum+=0
elif "neutral" in k:
observation_sum+=bucket[k]
else:
observation_sum=observation_sum+bucket[k]*2
#calculate kohen's kappa
agreement=0
total=0
values=[]
for k in bucket.keys():
total+=bucket[k]
if k[0] not in values:
values.append(k[0])
if k[1] not in values:
values.append(k[1])
if k[0]==k[1]:
agreement+=bucket[k]
expected_freq={}
for i in bucket.keys():
expected_freq[i]=0
row_total=0
col_total=0
for j in bucket.keys():
if j[0]==i[0]:
row_total+=bucket[j]
if j[1]==i[1]:
col_total+=bucket
f=(col_total*row_total)/total
expected_freq[i]=f
expectation_sum=0
for k in expected_freq.keys():
if k[0]==k[1]:
expectation_sum+=0
elif "neutral" in k:
expectation_sum+=expected_freq[k]
else:
expectation_sum=expectation_sum+expected_freq[k]*2
weighted_k=1-(observation_sum/expectation_sum)
print (weighted_k)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment