Last active
March 17, 2017 09:20
-
-
Save markushuff/11db004e483b5678b2496aca91c14f83 to your computer and use it in GitHub Desktop.
How to calculate sensitivity d_a ("d sub a")? Step-by-step example from the Macmillan and Creelman book.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Calculation of d_a ("d sub a"). A step-by-step tutorial based on the book: | |
# Macmillan, N. A., & Creelman, C. D. (2004). Detection theory: A user’s guide. | |
# Psychology Press. | |
# | |
# pp. 57ff | |
library(ggplot2) | |
library(plyr) | |
# Create data set | |
frequency <- c(rep("low",5),rep("high",5)) | |
old <- c(61,15,15,5,4,37,25,18,11,9) | |
new <- c(2,8,35,23,30,4,18,28,21,29) | |
answer <- c(rep(1:5,2)) # 1 = "sure old", 2 = "maybe old", 3 = "uncertain", 4 = "maybe new", 5 = "sure new" | |
roc_test <- data.frame(frequency,old,new,answer) | |
# Create proportional data | |
roc_test$p_hit <- roc_test$old / 100 | |
roc_test$p_fa <- roc_test$new / 100 | |
# Create cummulative proportions | |
# IMPORTANT: The data has to be sorted such that the "sure old" answer comes first. In this example (1,2,3,4,5). | |
# If the answer variable is different, e.g. 6 for "sure old", than the data frame has to be adequately sorted (decreasing). | |
# Otherwise, the ROC curves look weired. | |
roc_test$p_cum_hit <- NA | |
roc_test$p_cum_fa <- NA | |
roc_test[roc_test$frequency == "low",]$p_cum_hit <- cumsum(roc_test[roc_test$frequency == "low",]$p_hit) | |
roc_test[roc_test$frequency == "low",]$p_cum_fa <- cumsum(roc_test[roc_test$frequency == "low",]$p_fa) | |
roc_test[roc_test$frequency == "high",]$p_cum_hit <- cumsum(roc_test[roc_test$frequency == "high",]$p_hit) | |
roc_test[roc_test$frequency == "high",]$p_cum_fa <- cumsum(roc_test[roc_test$frequency == "high",]$p_fa) | |
# Plot data | |
roc_test | |
# Exclude the last step of the cumulative function | |
# Note: if this step is not executed, the results will look different as compared to the book example. | |
roc_test <- roc_test[roc_test$answer != 5,] | |
# Plotting the data. | |
ggplot(roc_test,aes(p_cum_fa,p_cum_hit,color=frequency))+ | |
geom_point()+ | |
geom_line()+ | |
geom_abline()+ | |
coord_cartesian(xlim = c(0,1), ylim = c(0,1))+ | |
theme_bw(base_size = 16) | |
# Plotting the z-transfored data | |
ggplot(roc_test,aes(qnorm(p_cum_fa),qnorm(p_cum_hit),color=frequency))+ | |
geom_point()+ | |
geom_smooth(method="lm", se = FALSE)+ | |
geom_abline()+ | |
coord_cartesian(xlim = c(-2,2), ylim = c(-2,2))+ | |
theme_bw(base_size = 16) | |
# Get the intercept (d_2_prime, vertical intercept) and slope with maximum likelihood estimations based on the z-transformed data | |
# x: hits | |
# y: fa | |
lm.loss_x_y <- function(par) { | |
a.par <- par[1] # The current slope | |
b.par <- par[2] # The current intercept | |
err.sigma <- par[3] # The current error standard deviation | |
# If the error standard deviation is invalid (i.e.; negative), then we need to return a very high deviance | |
# This will tell the optimization procedure to stay away from invalid (either statistically or psychologically) | |
# parameter values. | |
if(err.sigma < 0) {deviance <- 10000000} | |
# If the error standard deviation is valid (i.e.; > 0), then calculate the deviance... | |
if(err.sigma > 0) { | |
# Calculate the likelihood of each data point. | |
# Here is where you specify the model and how you calculate likelihoods. | |
likelihoods <- dnorm(x_data, mean = y_data * a.par + b.par, sd = err.sigma) | |
# Now let's convert the vector of likelihoods to a summary deviance score (-2 times sub log-lik) | |
# Calculate log-likelihoods of each data point | |
log.likelihoods <- log(likelihoods) | |
# Calculate the deviance (-2 times sum of log-likelihoods) | |
deviance <- -2 * sum(log.likelihoods) | |
} | |
return(deviance) | |
} | |
dat_roc_stat <- data.frame(frequency = 1, | |
slope = 1, | |
d_2_prime = 1 | |
) | |
dat_roc_stat <- dat_roc_stat[-1,] | |
for(freq in unique(roc_test$frequency)) | |
{ | |
x_data <- roc_test[roc_test$frequency==freq,]$p_cum_hit | |
y_data <- roc_test[roc_test$frequency==freq,]$p_cum_fa | |
x_data <- qnorm(x_data) | |
y_data <- qnorm(y_data) | |
parameter.fits_d2_prime <- optim(par = c(0, 0, 10), | |
fn = lm.loss_x_y, | |
hessian = T) | |
tmp <- data.frame(frequency = freq, | |
slope = parameter.fits_d2_prime$par[1], | |
d_2_prime = parameter.fits_d2_prime$par[2] | |
) | |
dat_roc_stat <- rbind(dat_roc_stat, tmp) | |
} | |
# Calculate d_a (Formula 3.4) | |
dat_roc_stat$d_a <- sqrt(2/(1 + dat_roc_stat$slope^2)) * dat_roc_stat$d_2_prime | |
# The resutls are similar to those in the book chapter | |
dat_roc_stat |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment