Skip to content

Instantly share code, notes, and snippets.

@BERENZ
Created June 21, 2024 07:19
Show Gist options
  • Save BERENZ/b4455270415350053b743b68fd50c082 to your computer and use it in GitHub Desktop.
Save BERENZ/b4455270415350053b743b68fd50c082 to your computer and use it in GitHub Desktop.
An example code for the misclassification and misclassGLM
library("misclassGLM")
library("data.table")
library("simex")
## population definition
set.seed(2024)
N <- 100000
pop_data <- data.table(age = sample(c("Young", "Middle-aged", "Old"), size = N, c(0.40, 0.35, 0.25), replace = T),
ethnic = sample(c("Native", "Nonnative"), size = N, prob = c(0.85, 0.15), replace = T))
pop_data[ethnic == "Native" & age == "Young", internet := rbinom(.N, 1, 0.90)]
pop_data[ethnic == "Native" & age == "Middle-aged", internet := rbinom(.N, 1, 0.70)]
pop_data[ethnic == "Native" & age == "Old", internet := rbinom(.N, 1, 0.50)]
pop_data[ethnic == "Nonnative" & age == "Young", internet := rbinom(.N, 1, 0.25)]
pop_data[ethnic == "Nonnative" & age == "Middle-aged", internet := rbinom(.N, 1, 0.15)]
pop_data[ethnic == "Nonnative" & age == "Old", internet := rbinom(.N, 1, 0.10)]
pop_data[age == "Young", NEP := rbinom(.N, 1, 0.05)]
pop_data[age == "Middle-aged", NEP := rbinom(.N, 1, 0.40)]
pop_data[age == "Old", NEP := rbinom(.N, 1, 0.60)]
pop_data[age == "Young" & internet == 1, NIP := rbinom(.N, 1, 0.80)]
pop_data[age == "Middle-aged" & internet == 1, NIP := rbinom(.N, 1, 0.40)]
pop_data[age == "Old" & internet == 1, NIP := rbinom(.N, 1, 0.20)]
pop_data[internet == 0, NIP := rbinom(.N, 1, 0.10)]
pop_data[, age := factor(age, levels = c("Young", "Middle-aged", "Old"))]
pop_data[, ethnic := factor(ethnic, levels = c("Native", "Nonnative"))]
pop_data[, pi_A := exp(-2 + 2*internet) / (1 + exp(-2 + 2*internet))]
gamma_ethnic <- matrix(c(0.7, 0.3, 0.05, 0.95), ncol = 2)
gamma_age <- matrix(c(0.85, 0.10, 0.05,
0.20, 0.70, 0.10,
0.05, 0.15, 0.80), ncol = 3)
colnames(gamma_age) <- levels(pop_data$age)
colnames(gamma_ethnic) <- levels(pop_data$ethnic)
dd <- simex::misclass(pop_data[, .(age, ethnic)],
list(age = gamma_age, ethnic = gamma_ethnic),
k = 1)
pop_data[, age_m := dd$age]
pop_data[, ethnic_m := dd$ethnic]
## estimation of probabilities for age
Pmodel <- nnet::multinom(age ~ age_m, data = pop_data)
P <- predict(Pmodel, newdata = pop_data, type = "probs")
colnames(P) <- c("Young", "Middle-aged", "Old")
est <- misclassGLM(Y = pop_data$NEP,,
X = as.matrix(as.numeric(pop_data$ethnic == "Native")),
setM = matrix(c(0, 1, 2), nrow = 3),
P = P)
summary(est)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment