Skip to content

Instantly share code, notes, and snippets.

@matt-dray
Last active March 7, 2024 12:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matt-dray/f3063dd2f786053633191d6d8b7295e5 to your computer and use it in GitHub Desktop.
Save matt-dray/f3063dd2f786053633191d6d8b7295e5 to your computer and use it in GitHub Desktop.
Assign abstracts to sifters as equally as possible so that each abstract is reviewed exactly n times
# As above, but checks for exact name and affiliation matches between
# sifters and abstracts.
#
# It's possible certain abstracts might not get assigned, especially given combos
# of sifters writing abstracts, abstracts from sifters' affiliations, assignment
# capping and sifter capping. Should probably report any abstracts that have
# <assignment_cap assignments. As a precaution, have built in a max_iterations arg
# in case of infinite looping, but I don't think that will come into play.
.resample <- function(x, ...) x[sample.int(length(x), ...)] # see ?sample
.assign_abstracts <- function(
abstracts, # c("First Last" = "Org")
sifters, # c("First Last" = "Org")
assignment_cap = 2,
sifter_caps = NULL, # c("First Last" = 5)
max_iterations = 1000 # just in case?
) {
n_abstracts <- length(abstracts)
seq_abstracts <- seq_len(n_abstracts)
sifter_assignments <- setNames(vector("list", length(sifters)), names(sifters))
assignment_counts <- rep(0, n_abstracts)
iter <- 0
repeat {
for (i in names(sifter_assignments)) {
# 1. Find the pool of abstracts available to this sifter (if any)
# a. Check if sifter cap has been met
sifter_has_cap <- i %in% names(sifter_caps)
if (sifter_has_cap) {
sifter_cap <- sifter_caps[[i]]
sifter_assignment_count <- length(sifter_assignments[[i]])
}
if (sifter_has_cap && sifter_assignment_count == sifter_cap) next
# b. Add abstracts to pool if they have <n assignments
abstracts_under_min_count <- which(assignment_counts < assignment_cap)
if (length(abstracts_under_min_count) == 0) next
# c. Remove abstracts that are already assigned to this sifter
already_assigned_to_sifter <- sifter_assignments[[i]]
abstracts_available <- abstracts_under_min_count[!abstracts_under_min_count %in% already_assigned_to_sifter]
if (length(abstracts_available) == 0) next
# d. Remove abstracts by the named sifter
abstracts_by_sifter <- which(i == names(abstracts[abstracts_available]))
if (length(abstracts_by_sifter) > 0) {
abstracts_available <- abstracts_available[-abstracts_by_sifter]
}
if (length(abstracts_available) == 0) next
# e. Remove abstracts with the same affiliation as the sifter
sifter_affiliation <- unname(sifters[i])
abstracts_by_same_affiliation <- which(sifter_affiliation == unname(abstracts[abstracts_available]))
if (length(abstracts_by_same_affiliation) > 0) {
abstracts_available <- abstracts_available[-abstracts_by_same_affiliation]
}
if (length(abstracts_available) == 0) next
# 2. Select from pool randomly and assign to sifter
abstract_selected <- .resample(abstracts_available, 1)
sifter_assignments[[i]] <- c(sifter_assignments[[i]], abstract_selected)
# 3. Increment count for sampled abstract
assignment_counts[abstract_selected] <- assignment_counts[abstract_selected] + 1
if (all(assignment_counts == assignment_cap)) break
}
# Reorder so sifter with fewest assignments gets next assignment
sifter_assignments <- sifter_assignments[order(lengths(sifter_assignments))]
iter <- iter + 1
if (all(assignment_counts == assignment_cap)) break
if (iter == max_iterations) {
cat("max_iterations reached")
break
}
}
sifter_assignments <- lapply(sifter_assignments, sort)
sifter_assignments[order(names(sifter_assignments))]
}
# Prepare demo sifter and abstract sets
set.seed(1)
n_sifters <- 3
sifter_set <- setNames(charlatan::ch_company(n_sifters), charlatan::ch_name(n_sifters))
sifter_names <- names(sifter_set)
sifter_companies <- unname(sifter_set)
abstract_name_pool <- c(sifter_names, charlatan::ch_name(10))
abstract_company_pool <- c(rep(sifter_companies, 2), charlatan::ch_company(7))
abstract_set <- setNames(abstract_company_pool, abstract_name_pool)
abstract_set <- sample(abstract_set, length(abstract_set))
.assign_abstracts(
abstracts = abstract_set,
sifters = sifter_set,
assignment_cap = 2,
sifter_caps = c("Channing Glover" = 3)
)
.resample <- function(x, ...) x[sample.int(length(x), ...)] # see ?sample
.assign_sifters <- function(
n_abstracts,
sifters,
sifter_caps,
n_assignments
) {
seq_abstracts <- seq_len(n_abstracts)
sifter_assignments <- setNames(vector("list", length(sifters)), sifters)
assignment_counts <- rep(0, n_abstracts)
repeat_iter <- 0
repeat {
for (i in names(sifter_assignments)) {
# 1. Find the pool of abstracts available to this sifter (if any)
# a. Check if sifter cap has been met
# b. Add abstracts to pool if they have <n assignments
# c. Remove abstracts that are already assigned to this sifter
sifter_cap <- sifter_caps[[i]]
sifter_assignment_count <- length(sifter_assignments[[i]])
if (!is.null(sifter_cap) && sifter_assignment_count == sifter_cap) next
abstracts_under_min_count <- which(assignment_counts < n_assignments)
if (length(abstracts_under_min_count) == 0) next
already_assigned_to_sifter <- sifter_assignments[[i]]
abstracts_available <- abstracts_under_min_count[!abstracts_under_min_count %in% already_assigned_to_sifter]
if (length(abstracts_available) == 0) next
# 2. Select from pool randomly and assign to sifter
abstract_selected <- .resample(abstracts_available, 1)
sifter_assignments[[i]] <- c(sifter_assignments[[i]], abstract_selected)
# 3. Increment count for sampled abstract
assignment_counts[abstract_selected] <- assignment_counts[abstract_selected] + 1
if (all(assignment_counts == n_assignments)) break
}
# Reorder so sifter with fewest assignments gets next assignment
sifter_assignments <- sifter_assignments[order(lengths(sifter_assignments))]
repeat_iter <- repeat_iter + 1
if (all(assignment_counts == n_assignments)) break
}
sifter_assignments <- lapply(sifter_assignments, sort)
sifter_assignments[order(names(sifter_assignments))]
}
reps <- 3
x <- .assign_sifters(
n_abstracts = 223,
sifters = LETTERS[1:5],
sifter_caps = list(A = 10, B = 20),
reps
)
lengths(x)
# A B C D E
# 10 20 139 139 138
unique(table(unlist(x))) == reps
# [1] TRUE
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment