Skip to content

Instantly share code, notes, and snippets.

@MilesMcBain
Last active August 18, 2021 05:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MilesMcBain/95d3f1e5dcf1d20890e6e3eacf73d3f0 to your computer and use it in GitHub Desktop.
Save MilesMcBain/95d3f1e5dcf1d20890e6e3eacf73d3f0 to your computer and use it in GitHub Desktop.
vectorised base R simulation
library(tidyverse)
do_sim <- function() {
n_locations <- 1000
n_stations <- 10
n_sources <- n_stations + n_locations
n_incidents <- 20000
n_replicates <- 100
probabilities <- seq(0.7, 1, by = 0.05)
names(probabilities) <- formatC(probabilities, format = "f", digits = 2)
# some pretend table
stations <- tibble(index = seq_len(n_locations + n_stations), name = "foo")
# a big matrix of travel_times
travel_times <- matrix(
runif(n_incidents * (n_locations + n_stations), 0, 40 * 60) |> as.integer(),
nrow = n_incidents,
ncol = n_stations + n_locations
) |> t()
# rows = stations
# cols = incidents
# we need matrix of travel times for each scenario
# assuming the matrix times run stations, notional locations
station_travel_times <- travel_times[seq(n_stations), ]
notional_station_travel_times <- travel_times[n_stations + seq(n_locations), ]
fastest_n <- function(col, n = 2) {
sorted <- .Internal(qsort(col, TRUE))
matrix(
c(
sorted$x[seq(n)],
sorted$ix[seq(n)]
),
byrow = FALSE,
nrow = n,
ncol = 2
)
}
scenario_travel_times <-
vapply(
asplit(notional_station_travel_times, 1),
function(a_row) {
scenario_travel_time_mat <- rbind(
station_travel_times,
a_row
)
},
array(rep(0, (n_stations + 1) * n_incidents), dim = c(n_stations + 1, n_incidents))
)
dim(scenario_travel_times) <- c(n_stations + 1, n_incidents * n_locations)
fastest_scenario_station_to_incidents <-
vapply(asplit(scenario_travel_times, 2), fastest_n, matrix(rep(0, 2 * 2), nrow = 2, ncol = 2))
# row = resulst (1st or 2nd)
# col1 = times
# col2 = indexes
# dimension 3 = incident, scenario
fastest_station_travel_times_to_incidents <- fastest_scenario_station_to_incidents[, 1, ]
dim(fastest_station_travel_times_to_incidents) <- c(2, n_incidents, n_locations)
fastest_station_travel_indexes_to_incidents_unfixed <- fastest_scenario_station_to_incidents[, 2, ]
dim(fastest_station_travel_indexes_to_incidents_unfixed) <- c(2, n_incidents, n_locations)
# need to correct indexes so they map to original sources
fastest_station_travel_indexes_to_incidents <-
vapply(
seq(n_locations),
function(x) {
layer <- fastest_station_travel_indexes_to_incidents_unfixed[, , x]
layer[layer == n_stations + 1] <- x + n_stations
layer
},
array(rep(0, 2 * n_incidents), dim = c(2, n_incidents))
)
results <- apply(
fastest_scenario_travel_times_to_incidents,
3,
function(travel_time_mat) {
replicate_incidents <- sample(seq(n_incidents), n_incidents * n_replicates * length(probabilities), replace = TRUE)
dim(replicate_incidents) <- c(n_incidents, n_replicates, length(probabilities))
replicate_probabilities <- vapply(
probabilities,
function(x) rbinom(n_incidents * n_replicates, 1, 1 - x),
# we want 0 to be fastest
matrix(rep(0L, n_incidents, n_replicates), nrow = n_incidents, ncol = n_replicates)
)
# dim 1 = incident
# dim 2 = replicate
# dim 3 = probability
time_indexes <- replicate_probabilities + 1
# We had 0s and 1s now we have 1s and 2s which index into our fastest_times
replicate_response_times <-
travel_time_mat[cbind(
as.vector(time_indexes),
as.vector(replicate_incidents)
)]
dim(replicate_response_times) <- c(n_incidents, n_replicates, length(probabilities))
replicate_groups <- asplit(replicate_response_times, 2)
summarise_replicate_group_matrix <- function(replicate_group_matrix) {
t(apply(replicate_group_matrix, 2, quantile, probs = c(0.5, 0.9)))
}
scenario_percentiles <- lapply(replicate_groups, summarise_replicate_group_matrix)
rectangle <- as.data.frame(do.call(rbind, scenario_percentiles))
rectangle$probability <- rep(probabilities, n_replicates)
rectangle$replicate <- rep(seq(n_replicates), each = length(probabilities))
rectangle
},
simplify = FALSE
)
results
}
bench::mark(
do_sim(),
max_iterations = 1
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment