Skip to content

Instantly share code, notes, and snippets.

@wjakethompson
Created January 13, 2019 20:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wjakethompson/631edbea51faed459559e049bbe1eefa to your computer and use it in GitHub Desktop.
Save wjakethompson/631edbea51faed459559e049bbe1eefa to your computer and use it in GitHub Desktop.
Submission for 2016 EM:IP Cover Showcase
# Note: Code and graphic have been updated since original submission
### User input -----------------------------------------------------------------
num_stu <- 20000
num_testlet <- 6
num_item <- 10
set.seed(3567)
### Packages -------------------------------------------------------------------
needed_packages <- c("tidyverse", "glue", "tidyselect", "colorblindr",
"hrbrthemes", "ggforce")
load_packages <- function(x) {
if (!(x %in% installed.packages())) {
install.packages(x)
}
suppressPackageStartupMessages(require(x, character.only = TRUE))
}
vapply(needed_packages, load_packages, logical(1))
### Simulate test --------------------------------------------------------------
items <- crossing(
level = seq_len(5),
testlet = seq_len(num_testlet),
item = seq_len(num_item)
) %>%
filter(!(testlet == 1 & level == 5)) %>%
mutate(
nonmaster_prob = runif(n = nrow(.), min = 0.1, max = 0.4),
master_prob = runif(n = nrow(.), min = 0.6, max = 0.9)
)
profiles <- list(
c(0, 0, 0, 0, 0),
c(1, 0, 0, 0, 0),
c(1, 1, 0, 0, 0),
c(1, 1, 1, 0, 0),
c(1, 1, 1, 1, 0),
c(1, 1, 1, 1, 1)
)
profile_probs <- c(0.15, 0.35, 0.25, 0.12, 0.09, 0.04)
stu_profiles <- map(seq_len(num_stu), function(x) {
map_dbl(seq_len(num_testlet), function(x) {
sample(profiles, size = 1) %>%
flatten_dbl() %>% sum()
})
})
max_int <- function(x, m) {
as.integer(min(x, m))
}
min_int <- function(x, m) {
as.integer(max(x, m))
}
stu_bands <- map_dbl(.x = stu_profiles, .f = function(x, bands) {
poss <- (round(mean(x), 0) - 1):(round(mean(x), 0) + 1)
poss <- vapply(poss, max_int, integer(1), m = max(bands))
poss <- vapply(poss, min_int, integer(1), m = min(bands))
sample(poss, size = 1)
},
bands = filter(items, testlet == 1) %>% pull(level) %>% unique())
only_if <- function(condition) {
function(func) {
if (condition) {
func
} else {
function(., ...) .
}
}
}
sim_data <- map2_dfr(stu_profiles, stu_bands, function(profile, band,
items) {
stu_data <- list_along(unique(items$testlet))
stu_levels <- list_along(unique(items$testlet)) %>%
set_names(nm = glue("rank_{seq_len(length(.))}"))
cur_level <- as.integer(band)
for (i in seq_along(stu_levels)) {
stu_levels[[i]] <- cur_level
master <- profile[i] >= cur_level
cur_testlet <- items %>%
filter(testlet == i, level == cur_level) %>%
mutate(rnd = runif(n = nrow(.), min = 0, max = 1)) %>%
only_if(master)(mutate)(score =
case_when(rnd <= master_prob ~ 1, TRUE ~ 0)) %>%
only_if(!master)(mutate)(score =
case_when(rnd <= nonmaster_prob ~ 1, TRUE ~ 0)) %>%
select(level, testlet, item, score)
stu_data[[i]] <- cur_testlet
pct_cor <- mean(cur_testlet$score)
if (pct_cor < 0.35) {
cur_level <- cur_level - 1
} else if (pct_cor >= 0.8) {
cur_level <- cur_level + 1
}
cur_level <- vapply(cur_level, max_int, integer(1), max(items$level))
cur_level <- vapply(cur_level, min_int, integer(1), min(items$level))
}
bind_cols(
data_frame(
stu_data = list(bind_rows(stu_data)),
path = flatten_int(stu_levels) %>% paste(collapse = "")
),
as_data_frame(stu_levels)
)
}, items = items)
rm(list = setdiff(ls(), c("sim_data", "num_item", "num_stu", "num_testlet")))
save(list = ls(), file = "~/Desktop/emip-2018-data.rda")
### Line plot ------------------------------------------------------------------
load("~/Desktop/emip-2018-data.rda")
pattern_count <- sim_data %>%
filter(str_detect(path, "^2")) %>%
group_by(path, rank_6) %>%
summarize(n = n()) %>%
group_by(rank_6) %>%
top_n(10, wt = n) %>%
ungroup() %>%
select(path, n)
line_data <- pmap_dfr(pattern_count, function(path, n) {
pattern <- str_split(path, "") %>% flatten_chr() %>% as.integer()
pat_data <- data_frame(testlet = seq_along(pattern), level = pattern,
final_level = pattern[length(pattern)])
ret_data <- data_frame(
pat_data = list(pat_data),
line_id = glue("{path}_{seq_len(n)}")
) %>%
unnest()
}) %>%
mutate(final_level = factor(final_level, levels = 1:5,
labels = c("Initial\nPrecursor", "Distal\nPrecursor", "Proximal\nPrecursor",
"Target", "Successor")))
point_data <- sim_data %>%
rowid_to_column(var = "stu_id") %>%
filter(str_detect(path, "^2")) %>%
select(-stu_data, -path) %>%
gather(key = rank, value = level, -stu_id) %>%
mutate(rank = str_replace(rank, "rank_", "") %>% as.integer()) %>%
group_by(rank, level) %>%
summarize(num_stu = n())
line_plot <- ggplot() +
geom_line(data = line_data, aes(x = testlet, y = level, group = line_id,
color = factor(final_level)), stat = "smooth", method = "loess",
alpha = 0.009, size = 1) +
geom_point(aes(x = point_data$rank, y = point_data$level,
size = point_data$num_stu), color = "black", alpha = 0.4) +
scale_x_continuous(lim = c(1, num_testlet), breaks = seq_len(num_testlet)) +
scale_y_continuous(lim = c(0, 6), breaks = seq_len(5),
labels = c("Initial\nPrecursor", "Distal\nPrecursor", "Proximal\nPrecursor",
"Target", "Successor")) +
scale_size_area(limits = c(0, num_stu), max_size = 25,
breaks = c(100, 500, seq(1000, 4000, 1000))) +
scale_color_OkabeIto() +
coord_cartesian(ylim = c(1, 5), xlim = c(1, num_testlet)) +
labs(x = "Testlet Number", y = "Linkage Level", size = "Number of\nStudents",
color = "Ending\nLinkage Level") +
theme_ipsum_ps() +
theme(legend.position = "bottom", legend.box = "vertical",
panel.grid.minor = element_blank()) +
guides(
color = guide_legend(nrow = 1, order = 1, override.aes = list(alpha = 1)),
size = guide_legend(nrow = 1, order = 2)
)
ggsave("line-plot.png", plot = line_plot, width = 12, height = 8, units = "in",
dpi = "retina")
@wjakethompson
Copy link
Author

line-plot

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment