Skip to content

Instantly share code, notes, and snippets.

@rweyant
Created February 5, 2018 18:58
Show Gist options
  • Save rweyant/a76586f9ceb813b9c94d2658f078a433 to your computer and use it in GitHub Desktop.
Save rweyant/a76586f9ceb813b9c94d2658f078a433 to your computer and use it in GitHub Desktop.
library(dplyr)
library(purrr)
library(fuzzyjoin)
library(microbenchmark)
library(magrittr)
#### setup ####
n_obs <- 1000
n_filters <- 1000
tbl <- data_frame(id = rnorm(n_obs, 100, 100))
interval_tbl <- data_frame(lower = rnorm(n_filters, 100, 100),
upper = lower + 1)
#### functions ####
# purrr
in_any_interval <- function(x, interval_tbl) {
interval_tbl %>%
select(lower, upper) %>%
pmap(~ between(x, ..1, ..2)) %>%
pmap_lgl(any)
}
# minor change
how_many_intervals <- function(x, interval_tbl) {
interval_tbl %>%
select(lower, upper) %>%
pmap(~ between(x, ..1, ..2)) %>%
pmap_int(sum)
}
# bit weirder
which_intervals <- function(x, interval_tbl) {
interval_tbl %>%
select(lower, upper) %>%
pmap(~ between(x, ..1, ..2)) %>%
pmap(lift_vd(which))
}
# apply
named_range <- function( lower, upper, data) {
data > lower & data < upper
}
all_ranges <- function( x, ranges_df){
apply(mapply(FUN=named_range, ranges_df$lower, ranges_df$upper, MoreArgs=list(data=x)), 1, any)
}
apply_result <- lapply(tbl, FUN=all_ranges, interval_tbl)$id
purrr_result <- tbl %>% mutate(which_intervals = which_intervals(id, interval_tbl),
in_any_interval = map_lgl(which_intervals, ~ length(.x) > 0)) %>%
extract2('in_any_interval')
list(apply_result, purrr_result) %>%
reduce(all_equal)
### benchmark ###
microbenchmark(
apply = lapply(tbl, FUN=all_ranges, interval_tbl),
purrr = tbl %>%
mutate(which_intervals = which_intervals(id, interval_tbl),
how_many_intervals = map_int(which_intervals, length)),
# fuzzy_join = tbl %>% fuzzy_join(interval_tbl, by = c("id" = "lower", "id" = "upper"), match_fun = list(`>=`, `<`)),
times = 5
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment