Last active
March 7, 2024 12:07
-
-
Save matt-dray/f3063dd2f786053633191d6d8b7295e5 to your computer and use it in GitHub Desktop.
Assign abstracts to sifters as equally as possible so that each abstract is reviewed exactly n times
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# As above, but checks for exact name and affiliation matches between | |
# sifters and abstracts. | |
# | |
# It's possible certain abstracts might not get assigned, especially given combos | |
# of sifters writing abstracts, abstracts from sifters' affiliations, assignment | |
# capping and sifter capping. Should probably report any abstracts that have | |
# <assignment_cap assignments. As a precaution, have built in a max_iterations arg | |
# in case of infinite looping, but I don't think that will come into play. | |
.resample <- function(x, ...) x[sample.int(length(x), ...)] # see ?sample | |
.assign_abstracts <- function( | |
abstracts, # c("First Last" = "Org") | |
sifters, # c("First Last" = "Org") | |
assignment_cap = 2, | |
sifter_caps = NULL, # c("First Last" = 5) | |
max_iterations = 1000 # just in case? | |
) { | |
n_abstracts <- length(abstracts) | |
seq_abstracts <- seq_len(n_abstracts) | |
sifter_assignments <- setNames(vector("list", length(sifters)), names(sifters)) | |
assignment_counts <- rep(0, n_abstracts) | |
iter <- 0 | |
repeat { | |
for (i in names(sifter_assignments)) { | |
# 1. Find the pool of abstracts available to this sifter (if any) | |
# a. Check if sifter cap has been met | |
sifter_has_cap <- i %in% names(sifter_caps) | |
if (sifter_has_cap) { | |
sifter_cap <- sifter_caps[[i]] | |
sifter_assignment_count <- length(sifter_assignments[[i]]) | |
} | |
if (sifter_has_cap && sifter_assignment_count == sifter_cap) next | |
# b. Add abstracts to pool if they have <n assignments | |
abstracts_under_min_count <- which(assignment_counts < assignment_cap) | |
if (length(abstracts_under_min_count) == 0) next | |
# c. Remove abstracts that are already assigned to this sifter | |
already_assigned_to_sifter <- sifter_assignments[[i]] | |
abstracts_available <- abstracts_under_min_count[!abstracts_under_min_count %in% already_assigned_to_sifter] | |
if (length(abstracts_available) == 0) next | |
# d. Remove abstracts by the named sifter | |
abstracts_by_sifter <- which(i == names(abstracts[abstracts_available])) | |
if (length(abstracts_by_sifter) > 0) { | |
abstracts_available <- abstracts_available[-abstracts_by_sifter] | |
} | |
if (length(abstracts_available) == 0) next | |
# e. Remove abstracts with the same affiliation as the sifter | |
sifter_affiliation <- unname(sifters[i]) | |
abstracts_by_same_affiliation <- which(sifter_affiliation == unname(abstracts[abstracts_available])) | |
if (length(abstracts_by_same_affiliation) > 0) { | |
abstracts_available <- abstracts_available[-abstracts_by_same_affiliation] | |
} | |
if (length(abstracts_available) == 0) next | |
# 2. Select from pool randomly and assign to sifter | |
abstract_selected <- .resample(abstracts_available, 1) | |
sifter_assignments[[i]] <- c(sifter_assignments[[i]], abstract_selected) | |
# 3. Increment count for sampled abstract | |
assignment_counts[abstract_selected] <- assignment_counts[abstract_selected] + 1 | |
if (all(assignment_counts == assignment_cap)) break | |
} | |
# Reorder so sifter with fewest assignments gets next assignment | |
sifter_assignments <- sifter_assignments[order(lengths(sifter_assignments))] | |
iter <- iter + 1 | |
if (all(assignment_counts == assignment_cap)) break | |
if (iter == max_iterations) { | |
cat("max_iterations reached") | |
break | |
} | |
} | |
sifter_assignments <- lapply(sifter_assignments, sort) | |
sifter_assignments[order(names(sifter_assignments))] | |
} | |
# Prepare demo sifter and abstract sets | |
set.seed(1) | |
n_sifters <- 3 | |
sifter_set <- setNames(charlatan::ch_company(n_sifters), charlatan::ch_name(n_sifters)) | |
sifter_names <- names(sifter_set) | |
sifter_companies <- unname(sifter_set) | |
abstract_name_pool <- c(sifter_names, charlatan::ch_name(10)) | |
abstract_company_pool <- c(rep(sifter_companies, 2), charlatan::ch_company(7)) | |
abstract_set <- setNames(abstract_company_pool, abstract_name_pool) | |
abstract_set <- sample(abstract_set, length(abstract_set)) | |
.assign_abstracts( | |
abstracts = abstract_set, | |
sifters = sifter_set, | |
assignment_cap = 2, | |
sifter_caps = c("Channing Glover" = 3) | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.resample <- function(x, ...) x[sample.int(length(x), ...)] # see ?sample | |
.assign_sifters <- function( | |
n_abstracts, | |
sifters, | |
sifter_caps, | |
n_assignments | |
) { | |
seq_abstracts <- seq_len(n_abstracts) | |
sifter_assignments <- setNames(vector("list", length(sifters)), sifters) | |
assignment_counts <- rep(0, n_abstracts) | |
repeat_iter <- 0 | |
repeat { | |
for (i in names(sifter_assignments)) { | |
# 1. Find the pool of abstracts available to this sifter (if any) | |
# a. Check if sifter cap has been met | |
# b. Add abstracts to pool if they have <n assignments | |
# c. Remove abstracts that are already assigned to this sifter | |
sifter_cap <- sifter_caps[[i]] | |
sifter_assignment_count <- length(sifter_assignments[[i]]) | |
if (!is.null(sifter_cap) && sifter_assignment_count == sifter_cap) next | |
abstracts_under_min_count <- which(assignment_counts < n_assignments) | |
if (length(abstracts_under_min_count) == 0) next | |
already_assigned_to_sifter <- sifter_assignments[[i]] | |
abstracts_available <- abstracts_under_min_count[!abstracts_under_min_count %in% already_assigned_to_sifter] | |
if (length(abstracts_available) == 0) next | |
# 2. Select from pool randomly and assign to sifter | |
abstract_selected <- .resample(abstracts_available, 1) | |
sifter_assignments[[i]] <- c(sifter_assignments[[i]], abstract_selected) | |
# 3. Increment count for sampled abstract | |
assignment_counts[abstract_selected] <- assignment_counts[abstract_selected] + 1 | |
if (all(assignment_counts == n_assignments)) break | |
} | |
# Reorder so sifter with fewest assignments gets next assignment | |
sifter_assignments <- sifter_assignments[order(lengths(sifter_assignments))] | |
repeat_iter <- repeat_iter + 1 | |
if (all(assignment_counts == n_assignments)) break | |
} | |
sifter_assignments <- lapply(sifter_assignments, sort) | |
sifter_assignments[order(names(sifter_assignments))] | |
} | |
reps <- 3 | |
x <- .assign_sifters( | |
n_abstracts = 223, | |
sifters = LETTERS[1:5], | |
sifter_caps = list(A = 10, B = 20), | |
reps | |
) | |
lengths(x) | |
# A B C D E | |
# 10 20 139 139 138 | |
unique(table(unlist(x))) == reps | |
# [1] TRUE |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment