wleoncio/issue40-sinan.md

## issue40-sinan.md

      
    Raw
  

              issue40-sinan.md
            
          
    Code from tmatta/lsasim#40

Assumptions that make code work

library(lsasim)
n_items <- 30
Original code

with printed statements wrapper in print().
response_gen2 <- function (subject, item, theta, a_par = NULL, b_par, c_par = NULL,
                           d_par = NULL, item_no = NULL, ogive = "Logistic") {
  if (length(subject) != length(item))
    stop("subject and item vectors must be equal length.",
         call. = FALSE)
  if (is.null(a_par))
    a_par <- rep(1, length(unique(item)))
  if (is.null(c_par))
    c_par <- rep(0, length(unique(item)))
  if (ogive == "Logistic")
    DD <- 1
  if (ogive == "Normal")
    DD <- 1.7
  if (is.null(item_no))
    item_no <- seq(length(unique(item)))
  if (is.null(d_par))
    b_pars <- split(b_par, seq(length(b_par)))
  if (!is.null(d_par)) {
    d_mat <- do.call("cbind", d_par)
    b_pars <- list()
    for (i in 1:length(b_par)) {
      if (sum(abs(d_mat[i, ])) != 0) {
        b_list <- list()
        for (j in 1:length(d_mat[i, ])) b_list[[j]] <- b_par[i] +
            d_mat[i, j]
        b_pars[[i]] <- unlist(b_list)
      }
      if (sum(abs(d_mat[i, ])) == 0)
        b_pars[[i]] <- b_par[i]
    }
  }
  names(b_pars) <- item_no

  y <- numeric(length(subject))
  for (n in 1:length(subject)) {
    y[n] <- irt_gen2(theta = theta[subject[n]], a_par = a_par[which(item_no ==
                                                                      item[n])], b_par = b_pars[[which(item_no == item[n])]],
                     c_par = c_par[which(item_no == item[n])], D = DD)
  }
  df_l <- data.frame(item = item, subject = subject, response = y)
  df_w <- reshape(df_l, timevar = "item", idvar = "subject",
                  direction = "wide")
  df_item_old <- colnames(df_w)[2:length(df_w)]
  df_item_num <- gsub("[^[:digit:]]", "", df_item_old)
  df_item_new <- ifelse(nchar(df_item_num) == 1, paste0("i00",
                                                        df_item_num), ifelse(nchar(df_item_num) == 2, paste0("i0",
                                                                                                             df_item_num), paste0("i", df_item_num)))
  colnames(df_w)[2:length(df_w)] <- df_item_new
  df_w <- df_w[, order(names(df_w))]
  rownames(df_w) <- NULL
  return(y = df_w)
}


irt_gen2 <- function (theta, a_par = 1, b_par, c_par = 0, D = 1)
{
  response_pr <- c_par + (1 - c_par) * (exp(a_par*(theta - b_par)) / (1 + exp(a_par*(theta - b_par))))
  y <- rbinom(1, size = 1, prob = response_pr)
  return(y)
}

set.seed(345)

#this is a normal example
subject <- 1:100
theta <- rnorm(100,0,1)
student_dt <- data.frame(subject, theta)

item <- as.integer(c(1:n_items))
a <- runif(n_items, 0.5, 1.5)
b <- runif(n_items, -3, 3)
c <- runif(n_items, 0, .15)
item_par <- data.frame(item, a, b, c)

resp_matrix <- response_gen(subject = sort(rep(subject, 30)), item = rep(item,100), theta = theta, a_par = a, b_par = b, c_par = c)

#item means make sense and everything looks fine
print(colMeans(resp_matrix[item]))
## i001 i002 i003 i004 i005 i006 i007 i008 i009 i010 i011 i012 i013 i014 i015 i016 i017 i018 i019 i020 i021 i022 i023 i024 i025 i026 i027 i028 i029 
## 0.37 0.58 0.50 0.55 0.91 0.58 0.11 0.21 0.62 0.41 0.15 0.84 0.37 0.25 0.21 0.11 0.74 0.07 0.64 0.45 0.79 0.36 0.75 0.34 0.31 0.66 0.23 0.45 0.79 
## i030 
## 0.24

print(lm(colMeans(resp_matrix[item]) ~ b))
## 
## Call:
## lm(formula = colMeans(resp_matrix[item]) ~ b)
## 
## Coefficients:
## (Intercept)            b  
##      0.4738      -0.1376

#Let's make it very extreme, student mean theta is -20, I don't expect them to solve anything. But guessing parameters are super high. So, regardles if their theta 80%-90% of them should solve these items.
subject <- 1:100
theta <- rnorm(100, -20 ,1)
student_dt <- data.frame(subject, theta)


item <- as.integer(c(1:n_items))
a <- runif(n_items, 0.5, 1.5)
b <- runif(n_items, -3, 3)

#chance is super high
c <- runif(n_items, 0.8, .9)
item_par <- data.frame(item, a, b, c)

resp_matrix <- response_gen(subject = sort(rep(subject, 30)), item = rep(item,100), theta = theta, a_par = a, b_par = b, c_par = c)

print(colMeans(resp_matrix[item])) #look at the results, they are around 50%, what happened to .8-.9 guessing parameter
## i001 i002 i003 i004 i005 i006 i007 i008 i009 i010 i011 i012 i013 i014 i015 i016 i017 i018 i019 i020 i021 i022 i023 i024 i025 i026 i027 i028 i029 
## 0.42 0.41 0.51 0.48 0.50 0.52 0.49 0.48 0.49 0.50 0.44 0.46 0.51 0.46 0.43 0.45 0.40 0.42 0.39 0.46 0.42 0.42 0.44 0.54 0.49 0.45 0.52 0.65 0.42 
## i030 
## 0.52

print(lm(colMeans(resp_matrix[item]) ~ b))
## 
## Call:
## lm(formula = colMeans(resp_matrix[item]) ~ b)
## 
## Coefficients:
## (Intercept)            b  
##    0.469921    -0.002693

#let's check after the fix - but it only works for dichotomous variables
resp_matrix2 <- response_gen2(subject = sort(rep(subject, 30)), item = rep(item,100), theta = theta, a_par = a, b_par = b, c_par = c)
print(colMeans(resp_matrix2[item])) #look at the results, they are around 50%, what happened to .8-.9 guessing parameter
## i001 i002 i003 i004 i005 i006 i007 i008 i009 i010 i011 i012 i013 i014 i015 i016 i017 i018 i019 i020 i021 i022 i023 i024 i025 i026 i027 i028 i029 
## 0.87 0.90 0.91 0.88 0.95 0.79 0.87 0.88 0.89 0.85 0.91 0.86 0.86 0.92 0.93 0.86 0.84 0.81 0.82 0.83 0.88 0.85 0.85 0.87 0.92 0.84 0.84 0.88 0.79 
## i030 
## 0.91

print(lm(colMeans(resp_matrix2[item]) ~ b))
## 
## Call:
## lm(formula = colMeans(resp_matrix2[item]) ~ b)
## 
## Coefficients:
## (Intercept)            b  
##     0.86944     -0.00818