Skip to content

Instantly share code, notes, and snippets.

@stephenturner
Last active July 28, 2017 19:56
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save stephenturner/44385b81ba767096c04e to your computer and use it in GitHub Desktop.
Save stephenturner/44385b81ba767096c04e to your computer and use it in GitHub Desktop.
Code for GGD blog post on microbenchmark
# see blog post here:
# http://gettinggeneticsdone.blogspot.com/2015/01/microbenchmark-package-r-compare-runtime-r-expressions.html
library(dplyr)
library(nycflights13)
flights
# base
aggregate(flights$arr_delay, by=list(flights$carrier), mean, na.rm=TRUE)
library(sqldf)
sqldf("SELECT carrier, avg(arr_delay) FROM flights GROUP BY carrier")
library(data.table)
flightsDT = data.table(flights)
flightsDT[ , mean(arr_delay, na.rm=TRUE), carrier]
library(dplyr)
flights %>% group_by(carrier) %>% summarize(mean(arr_delay, na.rm=TRUE))
library(microbenchmark)
mbm = microbenchmark(
base = aggregate(flights$arr_delay, by=list(flights$carrier), mean, na.rm=TRUE),
sqldf = sqldf("SELECT carrier, avg(arr_delay) FROM flights GROUP BY carrier"),
datatable = flightsDT[ , mean(arr_delay, na.rm=TRUE), carrier],
dplyr = flights %>% group_by(carrier) %>% summarize(mean(arr_delay, na.rm=TRUE)),
times=50
)
mbm
library(ggplot2)
autoplot(mbm)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment