Skip to content

Instantly share code, notes, and snippets.

@johnburnmurdoch
Created March 1, 2018 19:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnburnmurdoch/7374a41579b0fd56c2c8100e0aadb33f to your computer and use it in GitHub Desktop.
Save johnburnmurdoch/7374a41579b0fd56c2c8100e0aadb33f to your computer and use it in GitHub Desktop.
Vectorising for loops to save time in R
needs(tidyverse, magrittr, jsonlite, scales)
loopTimer <- function(rows){
col1 <- runif (rows, 0, 2)
col2 <- rnorm (rows, 0, 2)
col3 <- rpois (rows, 3)
col4 <- rchisq (rows, 2)
df <- data.frame (col1, col2, col3, col4)
r <- system.time({
for (i in 1:nrow(df)) { # for every row
if ((df[i, 'col1'] + df[i, 'col2'] + df[i, 'col3'] + df[i, 'col4']) > 4) { # check if > 4
df[i, 5] <- "greater_than_4" # assign 5th column
} else {
df[i, 5] <- "lesser_than_4" # assign 5th column
}
}
})
output <- character (nrow(df)) # initialize output vector
v <- system.time({
for (i in 1:nrow(df)) {
if ((df[i, 'col1'] + df[i, 'col2'] + df[i, 'col3'] + df[i, 'col4']) > 4) {
output[i] <- "greater_than_4"
} else {
output[i] <- "lesser_than_4"
}
}
df$output})
data.frame(type=names(r),Appending=as.numeric(r),Vectorised=as.numeric(v))
}
rowNums <- c(1:10)^5
timerList <- vector("list", length(rowNums))
for(i in 1:length(rowNums)){
n <- rowNums[i]
timerList[[i]] <- loopTimer(n) %>% mutate(nrow = n)
}
timerList %>% bind_rows %>%
filter(type == "elapsed") %>%
gather(method, seconds, 2:3) %>%
ggplot(aes(nrow, seconds, col=method, label = ifelse(nrow == max(nrow), paste0(method,' '), ''))) +
theme_minimal(base_family = "Avenir") +
theme(
legend.position = "none",
axis.line = element_line(color='black'),
panel.grid.minor = element_blank()
) +
geom_line() +
geom_text(hjust=1, vjust=0) +
scale_x_continuous(label = comma) +
scale_color_manual(values = c("Appending"="#0083eb", "Vectorised"="#ff2b4f")) +
labs(
title = "How to save time when processing data in for loops",
x = "Rows in data frame",
y = "Seconds elapsed"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment