Skip to content

Instantly share code, notes, and snippets.

@magic-lantern
Last active October 5, 2021 22:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save magic-lantern/eb5cddd879261f423fafa14dadf1d156 to your computer and use it in GitHub Desktop.
Save magic-lantern/eb5cddd879261f423fafa14dadf1d156 to your computer and use it in GitHub Desktop.
bigrquery_example.R
library(bigrquery)
library(magrittr)
library(dplyr)
library(tidyr)
project <- "gbqproject"
dataset_writeable <- "gbqdataset"
dataset_readonly <- "gbqdataset_source"
# test query to prompt for GBQ Credentials
# RStudio can't just run the whole file at once
# bq_project_query(project, "select 'connect to gbq test'")
# example of how to see what tables are available
# library(DBI)
#
# con <- dbConnect(
# bigrquery::bigquery(),
# project = project,
# dataset = dataset_writeable,
# bigint = "numeric"
# )
# con
# dbListTables(con)
# load common definitions and libraries that are used in multiple scripts
source("r/base.R")
# test query to prompt for GBQ Credentials
# RStudio can't just run the whole file at once
# bq_project_query(project, "select 'connect to gbq test'")
sql_stmt <- paste0("
WITH
diagnosis AS (
SELECT *
FROM `gbqproject.gbqdataset.table4_diagnoses_*` AS I
WHERE
1=1
AND I. _TABLE_SUFFIX = (SELECT MAX(REPLACE(table_id, 'table4_diagnoses_', ''))
FROM `gbqproject.gbqdataset.__TABLES_SUMMARY__`
WHERE table_id LIKE 'table4_diagnoses_%') )
SELECT distinct arb_person_id, icd_type, icd_code
FROM diagnosis
WHERE icd_type != 'SNOMED CT'
ORDER BY arb_person_id
")
gbq_cursor <- bq_project_query(project, sql_stmt)
result <- bq_table_download(gbq_cursor,
page_size = 120000,
bigint = "numeric")
colnames(result) <- tolower(colnames(result))
# Due to either bugs in BigQuery's API or bigrquery, sometimes not all results are returned
# found that at least for up to about 20 million rows, adjusting page size can make it work
if (nrow(result[result$arb_person_id == 0, ]) > 0) stop("Invalid data returned from GBQ")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment