Code from tmatta/lsasim#40
library(lsasim)
n_items <- 30
with printed statements wrapper in print()
.
response_gen2 <- function (subject, item, theta, a_par = NULL, b_par, c_par = NULL,
d_par = NULL, item_no = NULL, ogive = "Logistic") {
if (length(subject) != length(item))
stop("subject and item vectors must be equal length.",
call. = FALSE)
if (is.null(a_par))
a_par <- rep(1, length(unique(item)))
if (is.null(c_par))
c_par <- rep(0, length(unique(item)))
if (ogive == "Logistic")
DD <- 1
if (ogive == "Normal")
DD <- 1.7
if (is.null(item_no))
item_no <- seq(length(unique(item)))
if (is.null(d_par))
b_pars <- split(b_par, seq(length(b_par)))
if (!is.null(d_par)) {
d_mat <- do.call("cbind", d_par)
b_pars <- list()
for (i in 1:length(b_par)) {
if (sum(abs(d_mat[i, ])) != 0) {
b_list <- list()
for (j in 1:length(d_mat[i, ])) b_list[[j]] <- b_par[i] +
d_mat[i, j]
b_pars[[i]] <- unlist(b_list)
}
if (sum(abs(d_mat[i, ])) == 0)
b_pars[[i]] <- b_par[i]
}
}
names(b_pars) <- item_no
y <- numeric(length(subject))
for (n in 1:length(subject)) {
y[n] <- irt_gen2(theta = theta[subject[n]], a_par = a_par[which(item_no ==
item[n])], b_par = b_pars[[which(item_no == item[n])]],
c_par = c_par[which(item_no == item[n])], D = DD)
}
df_l <- data.frame(item = item, subject = subject, response = y)
df_w <- reshape(df_l, timevar = "item", idvar = "subject",
direction = "wide")
df_item_old <- colnames(df_w)[2:length(df_w)]
df_item_num <- gsub("[^[:digit:]]", "", df_item_old)
df_item_new <- ifelse(nchar(df_item_num) == 1, paste0("i00",
df_item_num), ifelse(nchar(df_item_num) == 2, paste0("i0",
df_item_num), paste0("i", df_item_num)))
colnames(df_w)[2:length(df_w)] <- df_item_new
df_w <- df_w[, order(names(df_w))]
rownames(df_w) <- NULL
return(y = df_w)
}
irt_gen2 <- function (theta, a_par = 1, b_par, c_par = 0, D = 1)
{
response_pr <- c_par + (1 - c_par) * (exp(a_par*(theta - b_par)) / (1 + exp(a_par*(theta - b_par))))
y <- rbinom(1, size = 1, prob = response_pr)
return(y)
}
set.seed(345)
#this is a normal example
subject <- 1:100
theta <- rnorm(100,0,1)
student_dt <- data.frame(subject, theta)
item <- as.integer(c(1:n_items))
a <- runif(n_items, 0.5, 1.5)
b <- runif(n_items, -3, 3)
c <- runif(n_items, 0, .15)
item_par <- data.frame(item, a, b, c)
resp_matrix <- response_gen(subject = sort(rep(subject, 30)), item = rep(item,100), theta = theta, a_par = a, b_par = b, c_par = c)
#item means make sense and everything looks fine
print(colMeans(resp_matrix[item]))
## i001 i002 i003 i004 i005 i006 i007 i008 i009 i010 i011 i012 i013 i014 i015 i016 i017 i018 i019 i020 i021 i022 i023 i024 i025 i026 i027 i028 i029
## 0.37 0.58 0.50 0.55 0.91 0.58 0.11 0.21 0.62 0.41 0.15 0.84 0.37 0.25 0.21 0.11 0.74 0.07 0.64 0.45 0.79 0.36 0.75 0.34 0.31 0.66 0.23 0.45 0.79
## i030
## 0.24
print(lm(colMeans(resp_matrix[item]) ~ b))
##
## Call:
## lm(formula = colMeans(resp_matrix[item]) ~ b)
##
## Coefficients:
## (Intercept) b
## 0.4738 -0.1376
#Let's make it very extreme, student mean theta is -20, I don't expect them to solve anything. But guessing parameters are super high. So, regardles if their theta 80%-90% of them should solve these items.
subject <- 1:100
theta <- rnorm(100, -20 ,1)
student_dt <- data.frame(subject, theta)
item <- as.integer(c(1:n_items))
a <- runif(n_items, 0.5, 1.5)
b <- runif(n_items, -3, 3)
#chance is super high
c <- runif(n_items, 0.8, .9)
item_par <- data.frame(item, a, b, c)
resp_matrix <- response_gen(subject = sort(rep(subject, 30)), item = rep(item,100), theta = theta, a_par = a, b_par = b, c_par = c)
print(colMeans(resp_matrix[item])) #look at the results, they are around 50%, what happened to .8-.9 guessing parameter
## i001 i002 i003 i004 i005 i006 i007 i008 i009 i010 i011 i012 i013 i014 i015 i016 i017 i018 i019 i020 i021 i022 i023 i024 i025 i026 i027 i028 i029
## 0.42 0.41 0.51 0.48 0.50 0.52 0.49 0.48 0.49 0.50 0.44 0.46 0.51 0.46 0.43 0.45 0.40 0.42 0.39 0.46 0.42 0.42 0.44 0.54 0.49 0.45 0.52 0.65 0.42
## i030
## 0.52
print(lm(colMeans(resp_matrix[item]) ~ b))
##
## Call:
## lm(formula = colMeans(resp_matrix[item]) ~ b)
##
## Coefficients:
## (Intercept) b
## 0.469921 -0.002693
#let's check after the fix - but it only works for dichotomous variables
resp_matrix2 <- response_gen2(subject = sort(rep(subject, 30)), item = rep(item,100), theta = theta, a_par = a, b_par = b, c_par = c)
print(colMeans(resp_matrix2[item])) #look at the results, they are around 50%, what happened to .8-.9 guessing parameter
## i001 i002 i003 i004 i005 i006 i007 i008 i009 i010 i011 i012 i013 i014 i015 i016 i017 i018 i019 i020 i021 i022 i023 i024 i025 i026 i027 i028 i029
## 0.87 0.90 0.91 0.88 0.95 0.79 0.87 0.88 0.89 0.85 0.91 0.86 0.86 0.92 0.93 0.86 0.84 0.81 0.82 0.83 0.88 0.85 0.85 0.87 0.92 0.84 0.84 0.88 0.79
## i030
## 0.91
print(lm(colMeans(resp_matrix2[item]) ~ b))
##
## Call:
## lm(formula = colMeans(resp_matrix2[item]) ~ b)
##
## Coefficients:
## (Intercept) b
## 0.86944 -0.00818