Skip to content

Instantly share code, notes, and snippets.

@markushuff
Last active March 17, 2017 09:20
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save markushuff/11db004e483b5678b2496aca91c14f83 to your computer and use it in GitHub Desktop.
Save markushuff/11db004e483b5678b2496aca91c14f83 to your computer and use it in GitHub Desktop.
How to calculate sensitivity d_a ("d sub a")? Step-by-step example from the Macmillan and Creelman book.
# Calculation of d_a ("d sub a"). A step-by-step tutorial based on the book:
# Macmillan, N. A., & Creelman, C. D. (2004). Detection theory: A user’s guide.
# Psychology Press.
#
# pp. 57ff
library(ggplot2)
library(plyr)
# Create data set
frequency <- c(rep("low",5),rep("high",5))
old <- c(61,15,15,5,4,37,25,18,11,9)
new <- c(2,8,35,23,30,4,18,28,21,29)
answer <- c(rep(1:5,2)) # 1 = "sure old", 2 = "maybe old", 3 = "uncertain", 4 = "maybe new", 5 = "sure new"
roc_test <- data.frame(frequency,old,new,answer)
# Create proportional data
roc_test$p_hit <- roc_test$old / 100
roc_test$p_fa <- roc_test$new / 100
# Create cummulative proportions
# IMPORTANT: The data has to be sorted such that the "sure old" answer comes first. In this example (1,2,3,4,5).
# If the answer variable is different, e.g. 6 for "sure old", than the data frame has to be adequately sorted (decreasing).
# Otherwise, the ROC curves look weired.
roc_test$p_cum_hit <- NA
roc_test$p_cum_fa <- NA
roc_test[roc_test$frequency == "low",]$p_cum_hit <- cumsum(roc_test[roc_test$frequency == "low",]$p_hit)
roc_test[roc_test$frequency == "low",]$p_cum_fa <- cumsum(roc_test[roc_test$frequency == "low",]$p_fa)
roc_test[roc_test$frequency == "high",]$p_cum_hit <- cumsum(roc_test[roc_test$frequency == "high",]$p_hit)
roc_test[roc_test$frequency == "high",]$p_cum_fa <- cumsum(roc_test[roc_test$frequency == "high",]$p_fa)
# Plot data
roc_test
# Exclude the last step of the cumulative function
# Note: if this step is not executed, the results will look different as compared to the book example.
roc_test <- roc_test[roc_test$answer != 5,]
# Plotting the data.
ggplot(roc_test,aes(p_cum_fa,p_cum_hit,color=frequency))+
geom_point()+
geom_line()+
geom_abline()+
coord_cartesian(xlim = c(0,1), ylim = c(0,1))+
theme_bw(base_size = 16)
# Plotting the z-transfored data
ggplot(roc_test,aes(qnorm(p_cum_fa),qnorm(p_cum_hit),color=frequency))+
geom_point()+
geom_smooth(method="lm", se = FALSE)+
geom_abline()+
coord_cartesian(xlim = c(-2,2), ylim = c(-2,2))+
theme_bw(base_size = 16)
# Get the intercept (d_2_prime, vertical intercept) and slope with maximum likelihood estimations based on the z-transformed data
# x: hits
# y: fa
lm.loss_x_y <- function(par) {
a.par <- par[1] # The current slope
b.par <- par[2] # The current intercept
err.sigma <- par[3] # The current error standard deviation
# If the error standard deviation is invalid (i.e.; negative), then we need to return a very high deviance
# This will tell the optimization procedure to stay away from invalid (either statistically or psychologically)
# parameter values.
if(err.sigma < 0) {deviance <- 10000000}
# If the error standard deviation is valid (i.e.; > 0), then calculate the deviance...
if(err.sigma > 0) {
# Calculate the likelihood of each data point.
# Here is where you specify the model and how you calculate likelihoods.
likelihoods <- dnorm(x_data, mean = y_data * a.par + b.par, sd = err.sigma)
# Now let's convert the vector of likelihoods to a summary deviance score (-2 times sub log-lik)
# Calculate log-likelihoods of each data point
log.likelihoods <- log(likelihoods)
# Calculate the deviance (-2 times sum of log-likelihoods)
deviance <- -2 * sum(log.likelihoods)
}
return(deviance)
}
dat_roc_stat <- data.frame(frequency = 1,
slope = 1,
d_2_prime = 1
)
dat_roc_stat <- dat_roc_stat[-1,]
for(freq in unique(roc_test$frequency))
{
x_data <- roc_test[roc_test$frequency==freq,]$p_cum_hit
y_data <- roc_test[roc_test$frequency==freq,]$p_cum_fa
x_data <- qnorm(x_data)
y_data <- qnorm(y_data)
parameter.fits_d2_prime <- optim(par = c(0, 0, 10),
fn = lm.loss_x_y,
hessian = T)
tmp <- data.frame(frequency = freq,
slope = parameter.fits_d2_prime$par[1],
d_2_prime = parameter.fits_d2_prime$par[2]
)
dat_roc_stat <- rbind(dat_roc_stat, tmp)
}
# Calculate d_a (Formula 3.4)
dat_roc_stat$d_a <- sqrt(2/(1 + dat_roc_stat$slope^2)) * dat_roc_stat$d_2_prime
# The resutls are similar to those in the book chapter
dat_roc_stat
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment