Skip to content

Instantly share code, notes, and snippets.

@patperu
Created February 24, 2020 08:57
Show Gist options
  • Save patperu/64fb0b19f4982896790619d8b44d13d0 to your computer and use it in GitHub Desktop.
Save patperu/64fb0b19f4982896790619d8b44d13d0 to your computer and use it in GitHub Desktop.
R Import Destatis Gemeindeverzeichnis
# https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/_inhalt.html
library(tidyverse)
library(pdftools)
read_gv100ad <- function(data_file, data_dict_file) {
#################################################################
# Data Dictionary
#################################################################
data_dic <- pdftools::pdf_text(data_dict_file)
read_data_dic <- function(df, x) {
read_lines(df[x]) %>%
str_subset("^ EF") %>%
read_table(., col_names = FALSE)
}
dict <- purrr::map_dfr(seq_along(data_dic),
read_data_dic, .id = "Satzart",
df = data_dic) %>%
mutate(Satzart = paste0(Satzart, 0))
#################################################################
# Data Import
#################################################################
import_gv <- function(df, dict, satzart) {
df <- read_lines(df)
df <- df[grep(paste0("^", satzart), df)]
dict <- dict %>%
dplyr::filter(Satzart == satzart, !str_detect(X1, "U"))
df %>%
read_fwf(fwf_widths(dict$X5, col_names = dict$X1),
col_types = cols(.default = "c"),
locale = locale(date_names = "de", encoding = "latin1")
)
}
gv_satzart <- map(seq(10, 60, by = 10), import_gv, df = data_file, dict = dict)
out <- list(gv_satzart = gv_satzart, dictionary = dict)
return(out)
}
fin <- read_gv100ad(data_file = "GV100AD_311218.ASC",
data_dict_file = "Datensatzbeschreibung_GV100AD.pdf")
fin
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment