Last active
June 26, 2018 09:56
-
-
Save nacnudus/05f5e6cc6f984a3017b78f159325c240 to your computer and use it in GitHub Desktop.
Various ways to download a GOV.UK register and load it into a database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load all registers into various databases | |
library(tidyverse) | |
library(DBI) | |
library(registr) # https://github.com/nacnudus/registr | |
library(stringr) | |
library(getPass) | |
library(here) | |
beta <- rr_registers() | |
alpha <- rr_registers(phase = "alpha") %>% compact() | |
really_alpha <- keep(alpha, !(names(alpha) %in% names(beta))) | |
discovery <- rr_registers(phase = "discovery") %>% compact() | |
really_discovery <- keep(alpha, !(names(discovery) %in% names(beta)), !(names(discovery) %in% names(alpha))) | |
registers <- | |
c(beta, really_alpha, really_discovery) %>% | |
map(pluck, "data") | |
decardinalise <- function(.data) { | |
mutate_if(.data, is.list, ~ map_chr(.x, ~ paste0(.x, collapse = ";"))) | |
} | |
# Generic function to write a register to a database | |
register2db <- function(x, name) { | |
message("Writing register to database: ", name, "\n") | |
dbWriteTable(con, name, decardinalise(x), overwrite = TRUE) | |
} | |
# SQLite | |
# Create a new database | |
sqlite_path <- file.path(here(), "registers.sqlite3") | |
con <- dbConnect(RSQLite::SQLite(), sqlite_path) | |
# Put each register into the database | |
imap(registers, register2db) | |
# Check that all the registers are now in the database | |
dbListTables(con) | |
setdiff(names(registers), dbListTables(con)) | |
# Close the database | |
dbDisconnect(con) | |
# PostgreSQL | |
con <- dbConnect(RPostgreSQL::PostgreSQL(), dbname = "registers") | |
# Put each register into the database | |
walk2(registers, names(registers), register2db) | |
# Check that all the registers are now in the database | |
dbListTables(con) | |
setdiff(names(registers), dbListTables(con)) | |
# Close the database | |
dbDisconnect(con) | |
# MySQL (using RMySQL, being phased out in favour of RMariaDB) | |
password <- getPass() | |
con <- dbConnect(RMySQL::MySQL(), dbname = "registers", password = password) | |
# Put each register into the database | |
names(registers) <- str_replace_all(names(registers), "-", "_") # MySQL/MariaDB has a problem with hyphenated names | |
walk2(registers, names(registers), register2db) | |
# Check that all the registers are now in the database | |
dbListTables(con) | |
setdiff(names(registers), dbListTables(con)) | |
# Close the database | |
dbDisconnect(con) | |
# MariaDB/MySQL (using RMariaDB, not yet on CRAN as of 2017-09-01 | |
# devtools::install_github("rstats-db/RMariaDB") | |
password <- getPass() | |
con <- dbConnect(RMariaDB::MariaDB(), dbname = "registers", password = password) | |
# Put each register into the database | |
names(registers) <- str_replace_all(names(registers), "-", "_") # MySQL/MariaDB has a problem with hyphenated names | |
walk2(registers, names(registers), register2db) | |
# Check that all the registers are now in the database | |
dbListTables(con) | |
setdiff(names(registers), dbListTables(con)) | |
# Close the database | |
dbDisconnect(con) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Updated for the latest version of RegistersClientR.