Skip to content

Instantly share code, notes, and snippets.

@yawnston
Created October 9, 2018 21:11
Show Gist options
  • Save yawnston/e68e6dfce94024d871c7e4834dafe31f to your computer and use it in GitHub Desktop.
Save yawnston/e68e6dfce94024d871c7e4834dafe31f to your computer and use it in GitHub Desktop.
library(data.table)
cry.A = read.table('cry-A.csv', header = FALSE, sep = ';', col.names = c("id", "class", "null"))
cry.A$null = NULL
cry.A = data.table(cry.A)
cry.B = fread("cry-B.csv", header = F, sep = ";", select = c(1,2), col.names = c("id", "class"))
setkey(cry.A, "id")
setkey(cry.B, "id")
cry.AB = cry.A[cry.B]
setnames(cry.AB, c("id", "A", "B"))
agr = table(cry.AB[, c(2,3)])
# Exercise 1
samples = sum(agr)
po = sum(diag(agr))/samples
pe = sum(table(cry.A$class) * table(cry.B$class) / samples^2)
# ckap is Cohen's Kappa
ckap = (po-pe)/(1-pe)
# Exercise 2
cry.GS = fread("cry-GS.csv", header = F, sep = ";", select = c(1,2), col.names = c("id", "class")); setkey(cry.GS, "id")
cry.F1 = fread("cry-F1.csv", header = F, sep = ";", select = c(1,2), col.names = c("id", "class")); setkey(cry.F1, "id")
cry.GF = cry.GS[cry.F1]
setnames(cry.GF, c("id", "GS", "F1"))
agr2 = t(table(cry.GF[, c(2:3)]))
classAcc = sum(diag(agr2)) / sum(agr2)
normalize = function(x) {
round(x / sum(x) * 100, 1)
}
probErr = apply(agr2, 2, normalize)
print(probErr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment