Skip to content

Instantly share code, notes, and snippets.

@AdamSpannbauer
Created June 11, 2018 19:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AdamSpannbauer/a3097414d013e6cb0a6b920a9490b33a to your computer and use it in GitHub Desktop.
Save AdamSpannbauer/a3097414d013e6cb0a6b920a9490b33a to your computer and use it in GitHub Desktop.
an R6 class for chunking large sql queries (written for use with SQL Server). this is a translation of https://gist.github.com/AdamSpannbauer/b04c1f6243ce07a5d2e0c9eb78502a55
library(R6)
QueryChunker = R6Class('QueryChunker',
public = list(
query = NULL,
chunk_query = NULL,
connection = NULL,
post_process_func = NULL,
chunk_size = 1000L,
offset_size = 0,
initialize = function(query,
connection,
post_process_func=NULL,
chunk_size = 1000L) {
self$query = query
self$connection = connection
self$post_process_func = post_process_func
self$chunk_size = chunk_size
# append chunking sql bits to users query
self$chunk_query = paste0(query, '\n',
'OFFSET {self$offset_size} ROWS\n',
'FETCH NEXT {self$chunk_size} ROWS ONLY')
},
fetch_chunk = function(debug_print=FALSE) {
# fill in offset and chunk size
query_i = glue::glue(self$chunk_query)
# print generated query for debug purposes
if (debug_print) cat(glue::glue('\n\r\n\rQUERY:',
'\n-------------\n\r',
'{query_i}',
'\n-------------\n\r\n\r'))
# exec query and store results
query_result = DBI::dbGetQuery(conn = self$connection,
statement = query_i)
# apply post processing func if provided
if (is.function(self$post_process_func)) {
query_result = self$post_process_func(query_result)
}
# increment chunk counter
self$offset_size = self$offset_size + self$chunk_size
return(query_result)
}
))
# define dbi connection
connection_string = "my connection string"
con = DBI::dbConnect(drv = odbc::odbc(),
.connection_string = connection_string)
query = "
SELECT *
FROM my_table
ORDER BY my_column"
# init a query chunker
query_chunker = QueryChunker$new(query,
connection = con,
chunk_size = 100L)
query_chunker$fetch_chunk() #fetch rows 1-100
query_chunker$fetch_chunk() #fetch rows 101-200
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment