Skip to content

Instantly share code, notes, and snippets.

@r2evans
Created August 15, 2020 02:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save r2evans/1ea2a1de11fc1f0aa391f9977766ccec to your computer and use it in GitHub Desktop.
Save r2evans/1ea2a1de11fc1f0aa391f9977766ccec to your computer and use it in GitHub Desktop.
Comparison of DBI/odbc, DBI/RODBCDBI, and RODBC db access for R
## ------------------------------------------------------------------
## SETUP
# docker run -d -p "31433:1433" --name testss \
# -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=Mysecretpassword1" \
# -e "MSSQL_PID=Developer" --cap-add SYS_PTRACE \
# microsoft/mssql-server-linux
# ... and add 'testss' as a DSN in the system- or user- odbc configuration
## ------------------------------------------------------------------
## CONNECTIONS
odbcdbi <- DBI::dbConnect(odbc::odbc(), dsn="testss", uid="SA", pwd="Mysecretpassword1")
rodbcdbi <- DBI::dbConnect(RODBCDBI::ODBC(), dsn="testss", uid="SA", pwd="Mysecretpassword1", case = "nochange")
rodbc <- RODBC::odbcConnect(dsn="testss", uid="SA", pwd="Mysecretpassword1")
## ------------------------------------------------------------------
## SAMPLE DATA
data("flights", package = "nycflights13")
head(flights, 10)
# # A tibble: 10 x 19
# year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
# <int> <int> <int> <int> <int> <dbl> <int> <int>
# 1 2013 1 1 517 515 2 830 819
# 2 2013 1 1 533 529 4 850 830
# 3 2013 1 1 542 540 2 923 850
# 4 2013 1 1 544 545 -1 1004 1022
# 5 2013 1 1 554 600 -6 812 837
# 6 2013 1 1 554 558 -4 740 728
# 7 2013 1 1 555 600 -5 913 854
# 8 2013 1 1 557 600 -3 709 723
# 9 2013 1 1 557 600 -3 838 846
# 10 2013 1 1 558 600 -2 753 745
# # ... with 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
# # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
# # hour <dbl>, minute <dbl>, time_hour <dttm>
## ------------------------------------------------------------------
## WRITE TEST
dat <- head(flights, 10)[,-19]
microbenchmark::microbenchmark(
odbcdbi = DBI::dbWriteTable(odbcdbi, "flights", dat),
rodbcdbi = DBI::dbWriteTable(rodbcdbi, "flights", dat),
rodbc = RODBC::sqlSave(rodbc, dat, "flights"),
setup = DBI::dbExecute(odbcdbi, "drop table flights"),
times = 10
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# odbcdbi 42.5866 48.1068 63.02283 52.3726 54.9726 175.0545 10
# rodbcdbi 120.9206 124.8994 147.82120 128.5527 133.4413 261.1105 10
# rodbc 117.3514 124.5346 128.05489 126.8475 131.7800 138.7100 10
dat <- head(flights, 10000)[,-19]
microbenchmark::microbenchmark(
odbcdbi = DBI::dbWriteTable(odbcdbi, "flights", dat),
rodbcdbi = DBI::dbWriteTable(rodbcdbi, "flights", dat),
rodbc = RODBC::sqlSave(rodbc, dat, "flights"),
setup = DBI::dbExecute(odbcdbi, "drop table flights"),
times = 10
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# odbcdbi 311.3064 346.2952 370.1984 360.6649 392.0773 463.7257 10
# rodbcdbi 76299.2259 77817.4400 82619.1613 81581.8538 87358.8572 89985.6128 10
# rodbc 74530.2949 76333.3798 79932.4808 81056.4030 82712.8733 85201.5292 10
## ------------------------------------------------------------------
## WHOLE-TABLE READ
microbenchmark::microbenchmark(
odbcdbi = DBI::dbReadTable(odbcdbi, "flights"),
rodbcdbi = DBI::dbReadTable(rodbcdbi, "flights"),
rodbc = RODBC::sqlFetch(rodbc, "flights"),
times = 10
)
# Unit: seconds
# expr min lq mean median uq max neval
# odbcdbi 1.888136 2.242207 2.378295 2.380542 2.537712 3.037383 10
# rodbcdbi 18.302842 20.572316 22.148530 22.180591 23.939045 25.751739 10
# rodbc 19.968433 21.002547 22.787167 21.709641 24.553324 28.107682 10
## ------------------------------------------------------------------
## QUERY (whole-table) READ
microbenchmark::microbenchmark(
odbcdbi = DBI::dbGetQuery(odbcdbi, "select * from flights"),
rodbcdbi = DBI::dbGetQuery(rodbcdbi, "select * from flights"),
rodbc = RODBC::sqlQuery(rodbc, "select * from flights"),
times = 10
)
# Unit: seconds
# expr min lq mean median uq max neval
# odbcdbi 1.445874 1.836158 1.884076 1.931463 1.944022 2.057212 10
# rodbcdbi 17.304687 18.970654 20.042980 19.982043 21.247385 22.101437 10
# rodbc 19.015758 19.243007 20.319214 19.957215 21.721255 22.245261 10
## ------------------------------------------------------------------
## QUERY (partial) READ
microbenchmark::microbenchmark(
odbcdbi = DBI::dbGetQuery(odbcdbi, "select top 1000 * from flights"),
rodbcdbi = DBI::dbGetQuery(rodbcdbi, "select top 1000 * from flights"),
rodbc = RODBC::sqlQuery(rodbc, "select top 1000 * from flights"),
times = 10
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# odbcdbi 10.5405 11.2504 11.75502 11.78765 12.0187 13.1082 10
# rodbcdbi 36.1839 36.7193 37.96366 37.46475 38.9476 41.6548 10
# rodbc 37.4643 37.7944 38.42617 37.94985 38.5589 41.2796 10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment