Skip to content

Instantly share code, notes, and snippets.

@JosepER
Created September 9, 2021 07:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JosepER/70b71c873e8f92779af37db34e387e97 to your computer and use it in GitHub Desktop.
Save JosepER/70b71c873e8f92779af37db34e387e97 to your computer and use it in GitHub Desktop.
Benchmarking Weighted Atkinson functions
library(tidyverse)
library(magrittr)
library(bench)
julia_weighted_atkinson_function <- JuliaCall::julia_eval("function weighted_atkinson(v, w, ϵ::Real, skipmissing::Bool)
if skipmissing
w = w[findall(!ismissing, v)]
v = v[findall(!ismissing, v)]
end
v = v/Statistics.mean(v)
w = w/sum(w)
if ϵ == 1
w = w[v .!= 0]
v = v[v .!= 0]
return 1 - (prod(exp.(w.*log.(v)))/sum(v .* w/sum(w)) )
elseif ϵ < 1
return 1-(sum(((v/sum(v.*w/sum(w))).^(1-ϵ)).*w/sum(w))).^(1/(1-ϵ))
else
w = w[v .!= 0]
v = v[v .!= 0]
return 1-(sum(((v/sum(v.*w/sum(w))).^(1-ϵ)).*w/sum(w))).^(1/(1-ϵ))
end
end ")
weighted_atkinson_jl <- function(x, weights, epsilon, na.rm = FALSE){
JuliaCall::julia_call("weighted_atkinson", x, weights, epsilon, na.rm)
}
weighted_atkinson <- function(x, weights, epsilon, na.rm = FALSE){
if(na.rm){
index_nas <- is.na(x)
x <- x[!index_nas]
weights <- weights[!index_nas]
}
if(epsilon >= 1){ # remove 0s if epsilon >= 1
index_0s <- x == 0
x <- x[!index_0s]
weights <- weights[!index_0s]
}
x <- x/mean(x)
weights <- weights/sum(weights)
return(
dplyr::if_else(epsilon==1,
true = 1 - (prod(exp(weights*log(x)))/sum(x*weights/sum(weights))),
false = 1 - (sum(((x/sum(x*weights/sum(weights)))^(1 - epsilon))*weights/sum(weights)))^(1/(1-epsilon)))
)
}
results <- bench::press(
rows = c(20000, 100000, 500000, 1200000, 5000000, 20000000),
epsilon = c(0.8, 1, 1.2),
{
dat <- tibble::tibble(x = runif(rows, 1, 5000),
wt = runif(rows, 0, 3))
bench::mark(
min_iterations = 15,
R = weighted_atkinson(x=dat$x,
weights=dat$wt,
epsilon = epsilon,
na.rm = TRUE),
julia = weighted_atkinson_jl(x=dat$x,
weights=dat$wt,
epsilon = epsilon,
na.rm = TRUE),
check = FALSE
)
}
)
results %>%
mutate(software = as.character(expression), median_time = (median)) %>%
select(software, rows, epsilon, median_time) %>%
pivot_wider(id_cols = c("rows", "epsilon"), names_from = "software", values_from = "median_time") %>%
mutate(ratio = as.numeric(R)/as.numeric(julia))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment