Last active
October 5, 2021 22:59
-
-
Save magic-lantern/eb5cddd879261f423fafa14dadf1d156 to your computer and use it in GitHub Desktop.
bigrquery_example.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(bigrquery) | |
library(magrittr) | |
library(dplyr) | |
library(tidyr) | |
project <- "gbqproject" | |
dataset_writeable <- "gbqdataset" | |
dataset_readonly <- "gbqdataset_source" | |
# test query to prompt for GBQ Credentials | |
# RStudio can't just run the whole file at once | |
# bq_project_query(project, "select 'connect to gbq test'") | |
# example of how to see what tables are available | |
# library(DBI) | |
# | |
# con <- dbConnect( | |
# bigrquery::bigquery(), | |
# project = project, | |
# dataset = dataset_writeable, | |
# bigint = "numeric" | |
# ) | |
# con | |
# dbListTables(con) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load common definitions and libraries that are used in multiple scripts | |
source("r/base.R") | |
# test query to prompt for GBQ Credentials | |
# RStudio can't just run the whole file at once | |
# bq_project_query(project, "select 'connect to gbq test'") | |
sql_stmt <- paste0(" | |
WITH | |
diagnosis AS ( | |
SELECT * | |
FROM `gbqproject.gbqdataset.table4_diagnoses_*` AS I | |
WHERE | |
1=1 | |
AND I. _TABLE_SUFFIX = (SELECT MAX(REPLACE(table_id, 'table4_diagnoses_', '')) | |
FROM `gbqproject.gbqdataset.__TABLES_SUMMARY__` | |
WHERE table_id LIKE 'table4_diagnoses_%') ) | |
SELECT distinct arb_person_id, icd_type, icd_code | |
FROM diagnosis | |
WHERE icd_type != 'SNOMED CT' | |
ORDER BY arb_person_id | |
") | |
gbq_cursor <- bq_project_query(project, sql_stmt) | |
result <- bq_table_download(gbq_cursor, | |
page_size = 120000, | |
bigint = "numeric") | |
colnames(result) <- tolower(colnames(result)) | |
# Due to either bugs in BigQuery's API or bigrquery, sometimes not all results are returned | |
# found that at least for up to about 20 million rows, adjusting page size can make it work | |
if (nrow(result[result$arb_person_id == 0, ]) > 0) stop("Invalid data returned from GBQ") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment