Created
October 1, 2014 18:05
-
-
Save joaovissoci/333bd0aa8ce2813ad5c6 to your computer and use it in GitHub Desktop.
irt_itemfit_mirt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
itemfit <- function(x, Zh = TRUE, X2 = FALSE, S_X2 = TRUE, group.size = 150, mincell = 1, S_X2.tables = FALSE, | |
empirical.plot = NULL, empirical.CI = 0, method = 'EAP', Theta = NULL, | |
impute = 0, ...){ | |
fn <- function(collect, obj, Theta, ...){ | |
tmpdat <- imputeMissing(obj, Theta) | |
tmpmod <- mirt(tmpdat, obj@nfact, pars = vals) | |
tmpmod@pars <- obj@pars | |
return(itemfit(tmpmod, Theta=Theta, ...)) | |
} | |
if(is(x, 'MixedClass')) | |
stop('mixedmirt objects not supported') | |
discrete <- FALSE | |
if(is(x, 'DiscreteClass')){ | |
class(x) <- 'MultipleGroupClass' | |
discrete <- TRUE | |
} | |
if(any(is.na(x@Data$data)) && !is(x, 'MultipleGroupClass')){ | |
if(impute == 0 || is.null(Theta)) | |
stop('Fit statistics cannot be computed when there are missing data. Pass suitable | |
Theta and impute arguments to compute statistics following multiple data | |
inputations') | |
collect <- vector('list', impute) | |
vals <- mod2values(x) | |
vals$est <- FALSE | |
collect <- myLapply(collect, fn, obj=x, Theta=Theta, vals=vals, | |
Zh=Zh, X2=X2, group.size=group.size, mincell=mincell, | |
S_X2.tables=S_X2.tables, empirical.plot=empirical.plot, | |
empirical.CI=empirical.CI, method=method, impute=0, | |
discrete=discrete, ...) | |
ave <- SD <- collect[[1L]] | |
pick1 <- 1:nrow(ave) | |
pick2 <- sapply(ave, is.numeric) | |
ave[pick1, pick2] <- SD[pick1, pick2] <- 0 | |
for(i in 1L:impute) | |
ave[pick1, pick2] <- ave[pick1, pick2] + collect[[i]][pick1, pick2] | |
ave[pick1, pick2] <- ave[pick1, pick2]/impute | |
for(i in 1L:impute) | |
SD[pick1, pick2] <- (ave[pick1, pick2] - collect[[i]][pick1, pick2])^2 | |
SD[pick1, pick2] <- sqrt(SD[pick1, pick2]/impute) | |
SD$item <- paste0('SD_', SD$item) | |
SD <- rbind(NA, SD) | |
return(rbind(ave, SD)) | |
} | |
if(is(x, 'MultipleGroupClass')){ | |
ret <- vector('list', length(x@pars)) | |
if(is.null(Theta)) | |
Theta <- fscores(x, method=method, scores.only=TRUE, full.scores=TRUE, ...) | |
for(g in 1L:length(x@pars)){ | |
tmpTheta <- Theta[x@Data$groupNames[g] == x@Data$group, , drop=FALSE] | |
tmp <- x@pars[[g]] | |
tmp@Data <- x@Data | |
tmp@Data$fulldata[[1L]] <- x@Data$fulldata[[g]] | |
ret[[g]] <- itemfit(tmp, Zh=Zh, X2=X2, group.size=group.size, mincell=mincell, | |
S_X2.tables=S_X2.tables, empirical.plot=empirical.plot, | |
Theta=tmpTheta, empirical.CI=empirical.CI, method=method, | |
impute=impute, discrete=discrete, ...) | |
} | |
names(ret) <- x@Data$groupNames | |
return(ret) | |
} | |
dots <- list(...) | |
discrete <- dots$discrete | |
discrete <- ifelse(is.null(discrete), FALSE, discrete) | |
if(S_X2.tables || discrete) Zh <- X2 <- FALSE | |
ret <- data.frame(item=colnames(x@Data$data)) | |
J <- ncol(x@Data$data) | |
itemloc <- x@itemloc | |
pars <- x@pars | |
if(Zh || X2){ | |
if(is.null(Theta)) | |
Theta <- fscores(x, verbose=FALSE, full.scores=TRUE, | |
scores.only=TRUE, method=method, ...) | |
prodlist <- attr(pars, 'prodlist') | |
nfact <- x@nfact + length(prodlist) | |
fulldata <- x@Data$fulldata[[1L]] | |
if(method %in% c('ML', 'WLE')){ | |
for(i in 1L:ncol(Theta)){ | |
tmp <- Theta[,i] | |
tmp[tmp %in% c(-Inf, Inf)] <- NA | |
Theta[Theta[,i] == Inf, i] <- max(tmp, na.rm=TRUE) + .1 | |
Theta[Theta[,i] == -Inf, i] <- min(tmp, na.rm=TRUE) - .1 | |
} | |
} | |
N <- nrow(Theta) | |
itemtrace <- matrix(0, ncol=ncol(fulldata), nrow=N) | |
for (i in 1L:J) | |
itemtrace[ ,itemloc[i]:(itemloc[i+1L] - 1L)] <- ProbTrace(x=pars[[i]], Theta=Theta) | |
LL <- itemtrace * fulldata | |
LL[LL < .Machine$double.eps] <- 1 | |
Lmatrix <- matrix(log(LL[as.logical(fulldata)]), N, J) | |
mu <- sigma2 <- rep(0, J) | |
log_itemtrace <- log(itemtrace) | |
for(item in 1L:J){ | |
P <- itemtrace[ ,itemloc[item]:(itemloc[item+1L]-1L)] | |
log_P <- log_itemtrace[ ,itemloc[item]:(itemloc[item+1L]-1L)] | |
mu[item] <- sum(P * log_P) | |
for(i in 1L:ncol(P)) | |
for(j in 1L:ncol(P)) | |
if(i != j) | |
sigma2[item] <- sigma2[item] + sum(P[,i] * P[,j] * | |
log_P[,i] * log(P[,i]/P[,j])) | |
} | |
ret$Zh <- (colSums(Lmatrix) - mu) / sqrt(sigma2) | |
#if all Rasch models, infit and outfit | |
if(all(x@itemtype %in% c('Rasch', 'rsm', 'gpcm'))){ | |
oneslopes <- rep(FALSE, length(x@itemtype)) | |
for(i in 1L:length(x@itemtype)) | |
oneslopes[i] <- closeEnough(x@pars[[i]]@par[1L], 1-1e-10, 1+1e-10) | |
if(all(oneslopes)){ | |
attr(x, 'inoutfitreturn') <- TRUE | |
pf <- personfit(x, method=method, Theta=Theta) | |
z2 <- pf$resid^2 / pf$W | |
outfit <- colSums(z2) / N | |
q.outfit <- sqrt(colSums((pf$C / pf$W^2) / N^2) - 1 / N) | |
q.outfit[q.outfit > 1.4142] <- 1.4142 | |
z.outfit <- (outfit^(1/3) - 1) * (3/q.outfit) + (q.outfit/3) | |
infit <- colSums(pf$W * z2) / colSums(pf$W) | |
q.infit <- sqrt(colSums(pf$C - pf$W^2) / colSums(pf$W)^2) | |
q.infit[q.infit > 1.4142] <- 1.4142 | |
z.infit <- (infit^(1/3) - 1) * (3/q.infit) + (q.infit/3) | |
ret$outfit <- outfit | |
ret$z.outfit <- z.outfit | |
ret$infit <- infit | |
ret$z.infit <- z.infit | |
} | |
} | |
} | |
if((X2 || !is.null(empirical.plot)) && nfact == 1L){ | |
ord <- order(Theta[,1L]) | |
fulldata <- fulldata[ord,] | |
Theta <- Theta[ord, , drop = FALSE] | |
den <- dnorm(Theta, 0, .5) | |
den <- den / sum(den) | |
cumTheta <- cumsum(den) | |
Groups <- rep(20, length(ord)) | |
ngroups <- ceiling(nrow(fulldata) / group.size) | |
weight <- 1/ngroups | |
for(i in 1L:20L) | |
Groups[round(cumTheta,2) >= weight*(i-1) & round(cumTheta,2) < weight*i] <- i | |
n.uniqueGroups <- length(unique(Groups)) | |
X2 <- df <- RMSEA <- rep(0, J) | |
if(!is.null(empirical.plot)){ | |
if(nfact > 1L) stop('Cannot make empirical plot for multidimensional models') | |
theta <- seq(-4,4, length.out=40) | |
ThetaFull <- thetaComb(theta, nfact) | |
if(!is.numeric(empirical.plot)){ | |
inames <- colnames(x@Data$data) | |
ind <- 1L:length(inames) | |
empirical.plot <- ind[inames == empirical.plot] | |
} | |
empirical.plot_P <- ProbTrace(pars[[empirical.plot]], ThetaFull) | |
empirical.plot_points <- matrix(NA, length(unique(Groups)), x@K[empirical.plot] + 2L) | |
} | |
for (i in 1L:J){ | |
if(!is.null(empirical.plot) && i != empirical.plot) next | |
for(j in unique(Groups)){ | |
dat <- fulldata[Groups == j, itemloc[i]:(itemloc[i+1] - 1), drop = FALSE] | |
r <- colSums(dat) | |
N <- nrow(dat) | |
mtheta <- matrix(mean(Theta[Groups == j,]), nrow=1) | |
if(!is.null(empirical.plot)){ | |
tmp <- r/N | |
empirical.plot_points[j, ] <- c(mtheta, N, tmp) | |
} | |
P <- ProbTrace(x=pars[[i]], Theta=mtheta) | |
if(any(N * P < 2)){ | |
df[i] <- df[i] - 1 | |
next | |
} | |
X2[i] <- X2[i] + sum((r - N*P)^2 / N*P) | |
} | |
df[i] <- df[i] + n.uniqueGroups*(length(r) - 1) - sum(pars[[i]]@est) | |
} | |
X2[X2 == 0] <- NA | |
if(!is.null(empirical.plot)){ | |
K <- x@K[empirical.plot] | |
EPCI.lower <- EPCI.upper <- NULL | |
if(K == 2 && empirical.CI != 0){ | |
p.L <- function(x, alpha) if (x[1] == 0) 0 else qbeta(alpha, x[1], x[2] - x[1] + 1) | |
p.U <- function(x, alpha) if (x[1] == x[2]) 1 else | |
qbeta(1 - alpha, x[1] + 1, x[2] - x[1]) | |
N <- empirical.plot_points[,2] | |
O <- empirical.plot_points[,ncol(empirical.plot_points)] * N | |
EPCI.lower <- apply(cbind(O, N), 1, p.L, (1-empirical.CI)/2) | |
EPCI.upper <- apply(cbind(O, N), 1, p.U, (1-empirical.CI)/2) | |
} | |
empirical.plot_points <- empirical.plot_points[,-2] | |
colnames(empirical.plot_points) <- c('theta', paste0('p.', 1:K)) | |
while(nrow(empirical.plot_points) < nrow(empirical.plot_P)) | |
empirical.plot_points <- rbind(empirical.plot_points, | |
rep(NA, length(empirical.plot_points[1,]))) | |
plt.1 <- data.frame(id = 1:nrow(ThetaFull), Theta=ThetaFull, P=empirical.plot_P) | |
plt.1 <- reshape(plt.1, varying = 3:ncol(plt.1), direction = 'long', timevar = 'cat') | |
plt.2 <- data.frame(id = 1:nrow(empirical.plot_points), empirical.plot_points) | |
plt.2 <- reshape(plt.2, varying = 3:ncol(plt.2), direction = 'long', timevar = 'cat') | |
plt <- cbind(plt.1, plt.2) | |
if(K == 2) plt <- plt[plt$cat != 1, ] | |
return(xyplot(P ~ Theta, plt, group = cat, | |
main = paste('Empirical plot for item', empirical.plot), | |
ylim = c(-0.1,1.1), xlab = expression(theta), ylab=expression(P(theta)), | |
auto.key=ifelse(K==2, FALSE, TRUE), EPCI.lower=EPCI.lower, | |
EPCI.upper=EPCI.upper, | |
panel = function(x, y, groups, subscripts, EPCI.lower, EPCI.upper, ...){ | |
panel.xyplot(x=x, y=y, groups=groups, type='l', | |
subscripts=subscripts, ...) | |
panel.points(cbind(plt$theta, plt$p), col=groups, pch=groups, ...) | |
if(!is.null(EPCI.lower)){ | |
theta <- na.omit(plt$theta) | |
for(i in 1:length(theta)) | |
panel.lines(c(theta[i], theta[i]), c(EPCI.lower[i], | |
EPCI.upper[i]), | |
lty = 2, col = 'red') | |
} | |
})) | |
} | |
ret$X2 <- X2 | |
ret$df <- df | |
ret$p.X2 <- round(1 - pchisq(X2, df), 4) | |
} | |
if(S_X2){ | |
dat <- x@Data$data | |
adj <- x@Data$mins | |
if(any(adj > 0)) | |
message('Data adjusted so that the lowest category score for every item is 0') | |
dat <- t(t(dat) - adj) | |
S_X2 <- df.S_X2 <- numeric(J) | |
O <- makeObstables(dat, x@K) | |
Nk <- rowSums(O[[1L]]) | |
dots <- list(...) | |
QMC <- ifelse(is.null(dots$QMC), FALSE, dots$QMC) | |
quadpts <- dots$quadpts | |
if(is.null(quadpts) && QMC) quadpts <- 2000L | |
if(is.null(quadpts)) quadpts <- select_quadpts(x@nfact) | |
theta_lim <- dots$theta_lim | |
if(is.null(theta_lim)) theta_lim <- c(-6,6) | |
gp <- ExtractGroupPars(pars[[length(pars)]]) | |
E <- EAPsum(x, S_X2 = TRUE, gp = gp, CUSTOM.IND=x@CUSTOM.IND, | |
quadpts=quadpts, theta_lim=theta_lim, discrete=discrete, QMC=QMC) | |
for(i in 1L:J) | |
E[[i]] <- E[[i]] * Nk | |
coll <- collapseCells(O, E, mincell=mincell) | |
if(S_X2.tables) return(list(O.org=O, E.org=E, O=coll$O, E=coll$E)) | |
O <- coll$O | |
E <- coll$E | |
for(i in 1L:J){ | |
if (is.null(dim(O[[i]]))) next | |
S_X2[i] <- sum((O[[i]] - E[[i]])^2 / E[[i]], na.rm = TRUE) | |
df.S_X2[i] <- sum(!is.na(E[[i]])) - nrow(E[[i]]) - sum(pars[[i]]@est) | |
} | |
S_X2[df.S_X2 <= 0] <- NaN | |
ret$S_X2 <- S_X2 | |
ret$df.S_X2 <- df.S_X2 | |
ret$p.S_X2 <- round(1 - pchisq(S_X2, df.S_X2), 4) | |
} | |
return(ret) | |
} | |
##Aplications | |
#' #make some data | |
#' set.seed(1234) | |
#' a <- matrix(rlnorm(20, meanlog=0, sdlog = .1),ncol=1) | |
#' d <- matrix(rnorm(20),ncol=1) | |
#' items <- rep('dich', 20) | |
#' data <- simdata(a,d, 2000, items) | |
#' | |
#' x <- mirt(data, 1) | |
#' raschfit <- mirt(data, 1, itemtype='Rasch') | |
#' fit <- itemfit(x) | |
#' fit | |
#' | |
#' itemfit(x, empirical.plot = 1) #empirical item plot | |
#' itemfit(x, empirical.plot = 1, empirical.CI = .99) #empirical item plot with 99% CI's | |
#' #method='ML' agrees better with eRm package | |
#' itemfit(raschfit, method = 'ML') #infit and outfit stats | |
#' #same as above, but inputting ML estimates instead | |
#' Theta <- fscores(raschfit, method = 'ML', full.scores=TRUE, scores.only=TRUE) | |
#' itemfit(raschfit, Theta=Theta) | |
#' | |
#' #similar example to Kang and Chen 2007 | |
#' a <- matrix(c(.8,.4,.7, .8, .4, .7, 1, 1, 1, 1)) | |
#' d <- matrix(rep(c(2.0,0.0,-1,-1.5),10), ncol=4, byrow=TRUE) | |
#' dat <- simdata(a,d,2000, itemtype = rep('graded', 10)) - 1 | |
#' head(dat) | |
#' | |
#' mod <- mirt(dat, 1) | |
#' itemfit(mod) | |
#' | |
#' mod2 <- mirt(dat, 1, 'Rasch') | |
#' itemfit(mod2) | |
#' | |
#' #massive list of tables | |
#' tables <- itemfit(mod, S_X2.tables = TRUE) | |
#' | |
#' #observed and expected total score patterns for item 1 (post collapsing) | |
#' tables$O[[1]] | |
#' tables$E[[1]] | |
#' | |
#' # fit stats with missing data (run in parallel using all cores) | |
#' data[sample(1:prod(dim(data)), 500)] <- NA | |
#' raschfit <- mirt(data, 1, itemtype='Rasch') | |
#' | |
#' mirtCluster() | |
#' Theta <- fscores(raschfit, method = 'ML', full.scores=TRUE) | |
#' itemfit(raschfit, impute = 10, Theta=Theta) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment