nasifimtiazohi/weighted_Kappa.py

## weighted_Kappa.py
import os
import openpyxl

''' let's assume there is an excel sheet
where two column holds ratings from two raters,
this script measures the agreement rate between them'''

#In this sample, the ratings are for sentiment
#possible ratings are: positive, negative, neutral

wb=openpyxl.load_workbook('test.xlsx')
ws=wb.get_active_sheet()

bucket={}
for i in range(start,end): #start row to end row in excel sheet
    if ws['columnn_rater1'+str(i)].value==None:
        break
    a=ws['columnn_rater1'+str(i)].value
    b=ws['columnn_rater2'+str(i)].value
    if (a,b) not in bucket.keys():
        bucket[(a,b)]=1
    else:
        bucket[(a,b)]+=1

#calculate weighted kohen's kappa
### this part is for weighted kappa in sentiment rating
### where disagreement of positive vs negative will be weighted more
### than disagreement of neutral vs polar (pos or neg)
### if not weighted kappa, then observation_sum will
### always be bucket[k] when k[0]!=k[1]
observation_sum=0
for k in bucket.keys():
    if k[0]==k[1]:
        observation_sum+=0
    elif "neutral" in k:
        observation_sum+=bucket[k]
    else:
        observation_sum=observation_sum+bucket[k]*2

#calculate kohen's kappa
agreement=0
total=0
values=[]
for k in bucket.keys():
    total+=bucket[k]
    if k[0] not in  values:
        values.append(k[0])
    if k[1] not in  values:
        values.append(k[1])
    if k[0]==k[1]:
        agreement+=bucket[k]

expected_freq={}
for i in bucket.keys():
    expected_freq[i]=0
    row_total=0
    col_total=0
    for j in bucket.keys():
        if j[0]==i[0]:
            row_total+=bucket[j]
        if j[1]==i[1]:
            col_total+=bucket
    f=(col_total*row_total)/total
    expected_freq[i]=f
expectation_sum=0
for k in expected_freq.keys():
    if k[0]==k[1]:
        expectation_sum+=0
    elif "neutral" in k:
        expectation_sum+=expected_freq[k]
    else:
        expectation_sum=expectation_sum+expected_freq[k]*2

weighted_k=1-(observation_sum/expectation_sum)
print (weighted_k)
	import os
	import openpyxl

	''' let's assume there is an excel sheet
	where two column holds ratings from two raters,
	this script measures the agreement rate between them'''

	#In this sample, the ratings are for sentiment
	#possible ratings are: positive, negative, neutral

	wb=openpyxl.load_workbook('test.xlsx')
	ws=wb.get_active_sheet()

	bucket={}
	for i in range(start,end): #start row to end row in excel sheet
	if ws['columnn_rater1'+str(i)].value==None:
	break
	a=ws['columnn_rater1'+str(i)].value
	b=ws['columnn_rater2'+str(i)].value
	if (a,b) not in bucket.keys():
	bucket[(a,b)]=1
	else:
	bucket[(a,b)]+=1

	#calculate weighted kohen's kappa
	### this part is for weighted kappa in sentiment rating
	### where disagreement of positive vs negative will be weighted more
	### than disagreement of neutral vs polar (pos or neg)
	### if not weighted kappa, then observation_sum will
	### always be bucket[k] when k[0]!=k[1]
	observation_sum=0
	for k in bucket.keys():
	if k[0]==k[1]:
	observation_sum+=0
	elif "neutral" in k:
	observation_sum+=bucket[k]
	else:
	observation_sum=observation_sum+bucket[k]*2

	#calculate kohen's kappa
	agreement=0
	total=0
	values=[]
	for k in bucket.keys():
	total+=bucket[k]
	if k[0] not in values:
	values.append(k[0])
	if k[1] not in values:
	values.append(k[1])
	if k[0]==k[1]:
	agreement+=bucket[k]

	expected_freq={}
	for i in bucket.keys():
	expected_freq[i]=0
	row_total=0
	col_total=0
	for j in bucket.keys():
	if j[0]==i[0]:
	row_total+=bucket[j]
	if j[1]==i[1]:
	col_total+=bucket
	f=(col_total*row_total)/total
	expected_freq[i]=f
	expectation_sum=0
	for k in expected_freq.keys():
	if k[0]==k[1]:
	expectation_sum+=0
	elif "neutral" in k:
	expectation_sum+=expected_freq[k]
	else:
	expectation_sum=expectation_sum+expected_freq[k]*2

	weighted_k=1-(observation_sum/expectation_sum)
	print (weighted_k)