Skip to content

Instantly share code, notes, and snippets.

@nutterb
Last active January 6, 2017 18:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nutterb/f05cc0e725ed389292116eb661f68bb5 to your computer and use it in GitHub Desktop.
Save nutterb/f05cc0e725ed389292116eb661f68bb5 to your computer and use it in GitHub Desktop.
An example of a case where a `for` loop isn't any slower than `lapply`. In fact, it is consistently faster than a typical `lapply`, though I can get very close to the execution time of the `for` loop if I use `<<-` in the `lapply`. Take a look at http://stackoverflow.com/questions/41471757/update-pairs-of-columns-based-on-pattern-in-their-names#…
# Adding lapply versions to http://stackoverflow.com/a/41511889/1017276
library(magrittr)
library(data.table)
library(microbenchmark)
set.seed(pi)
nc = 1e3
nr = 1e2
df_m0 = sample(c(1:10, NA_integer_), nc*nr, replace = TRUE) %>% matrix(nr, nc) %>% data.frame
df_r = sample(c(1:10), nc*nr, replace = TRUE) %>% matrix(nr, nc) %>% data.frame
microbenchmark(times = 10,
for_vec = {
df_m <- df_m0
for (col in 1:nc){
w <- which(is.na(df_m[[col]]))
df_m[[col]][w] <- df_r[[col]][w]
}
}, lapply_vec = {
df_m <- df_m0
lapply(seq_along(df_m),
function(i){
w <- which(is.na(df_m[[i]]))
df_m[[i]][w] <<- df_r[[i]][w]
})
}, for_df = {
df_m <- df_m0
for (col in 1:nc){
w <- which(is.na(df_m[[col]]))
df_m[w, col] <- df_r[w, col]
}
}, lapply_df = {
df_m <- df_m0
lapply(seq_along(df_m),
function(i){
w <- which(is.na(df_m[[i]]))
df_m[w, i] <<- df_r[w, i]
})
}, mat = { # in lmo's answer
df_m <- df_m0
bah = is.na(df_m)
df_m[bah] = df_r[bah]
}, set = {
df_m <- copy(df_m0)
for (col in 1:nc){
w = which(is.na(df_m[[col]]))
set(df_m, i = w, j = col, v = df_r[w, col])
}
}
)
library(microbenchmark)
library(reshape2)
col_1 <- c(1,2,NA,4,5)
temp_col_1 <-c(12,2,2,3,4)
col_2 <- c(1,23,423,NA,23)
temp_col_2 <-c(1,2,23,4,5)
df_orig <- data.frame(col_1,temp_col_1,col_2, temp_col_2)
df_orig <- df_orig[sample(1:nrow(df_orig), 10000, replace = TRUE), ]
microbenchmark(
for_loop =
{
df_test <- df_orig
temp_cols <- names(df_test)[grepl("^temp", names(df_test))]
cols <- sub("^temp_", "", temp_cols)
for (i in seq_along(temp_cols)){
row_to_replace <- which(is.na(df_test[[cols[i]]]))
df_test[[cols[i]]][row_to_replace] <- df_test[[temp_cols[i]]][row_to_replace]
}
},
lapply =
{
df_test <- df_orig
temp_cols <- names(df_test)[grepl("^temp", names(df_test))]
df_test[sub("^temp", "", temp_cols)] <-
lapply(temp_cols,
function(tc){
cols <- sub("^temp_", "", tc)
row_to_replace <- which(is.na(df_test[[cols]]))
df_test[[cols]][row_to_replace] <- df_test[[tc]][row_to_replace]
df_test[[cols]]
})
},
lapply_double = {
df_test <- df_orig
lapply(names(df_orig)[grepl("^temp_", names(df_orig))],
function(tc){
col <- sub("^temp_", "", tc)
row_to_replace <- which(is.na(df_test[[col]]))
df_test[[col]][row_to_replace] <<- df_test[[tc]][row_to_replace]
})
},
mapply_double = {
df_test <- df_orig
temp_cols <- names(df_test)[grepl("^temp", names(df_test))]
cols <- sub("^temp_", "", temp_cols)
mapply(
function(c, tc){
row_to_replace <- which(is.na(df_test[[c]]))
df_test[[c]][row_to_replace] <<- df_test[[tc]][row_to_replace]
},
c = cols,
tc = temp_cols
)
},
times = 100
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment