Skip to content

Instantly share code, notes, and snippets.

@uribo
Last active July 8, 2019 06:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save uribo/5c67ef24dcaf17402175b0d474cd8cb2 to your computer and use it in GitHub Desktop.
Save uribo/5c67ef24dcaf17402175b0d474cd8cb2 to your computer and use it in GitHub Desktop.
zipangu: 国土数値情報データをR上で扱いやすい形式にパースする
build_req_url <- function(api = c("getKSJSummary", "getKSJURL"), ...) {
query <- NULL
rlang::arg_match(api)
req_url <-
glue::glue(
"http://nlftp.mlit.go.jp/ksj/api/{version}/index.php/app/{api}.xml?appId={app_id}&lang={lang}&dataformat={data_format}",
version = "1.0b",
app_id = "ksjapibeta1",
lang = "J",
data_format = 1 # JPGIS2.1
)
req_url <-
httr::parse_url(req_url)
if (api == "getKSJURL")
req_url$query <- c(req_url$query, purrr::map_at(list(...),
c("prefCode", "meshCode", "metroArea", "fiscalyear"),
paste,
collapse = ","))
req_url
}
parse_ksj_xml <- function(x) {
if (x[[1]][[1]][[1]][[1]] != 0)
rlang::abort("error")
cat(cli::col_cyan("Hit"),
cli::style_bold(x[[1]]$NUMBER[[1]]),
cli::col_cyan("records.\n"))
tibble::tibble(item = purrr::pluck(purrr::pluck(x, 1), 4)) %>%
tidyr::unnest_wider(item) %>%
dplyr::mutate_all(unlist)
}
request_to_ksj <- function(x) {
httr::build_url(x) %>%
httr::GET() %>%
httr::content(encoding = "UTF-8") %>%
xml2::as_list() %>%
parse_ksj_xml()
}
#' A31 浸水想定区域データ
collect_a31 <- function(path) {
d <-
sf::st_read(path,
stringsAsFactors = FALSE,
crs = 4612,
as_tibble = TRUE) %>%
lwgeom::st_make_valid() %>%
sf::st_transform(crs = 4326) %>%
dplyr::mutate_at(dplyr::vars(A31_001, A31_002), as.character) %>%
dplyr::mutate(A31_001 = forcats::as_factor(A31_001)) %>%
dplyr::mutate_if(is.character,
stringi::stri_trans_general,
id = "nfkc")
xml_info <-
list.files(gsub(pattern = "(.+)/.+$",
replacement = "\\1",
path),
pattern = "^A31-12_.+.xml",
full.names = TRUE)
if (length(xml_info) == 1) {
xml_info <-
xml_info %>%
xml2::read_xml() %>%
xml2::as_list()
vars <- names(xml_info$Dataset$ExpectedFloodArea)[-1]
vars <- paste0("A31_",
stringr::str_pad(seq_len(length(vars)), width = 3, pad = "0")) %>%
purrr::set_names(vars) %>%
purrr::keep(~ .x %in% names(d))
d %>%
dplyr::rename(!!vars)
} else
d
}
#' W05 河川データ
collect_w05 <- function(path, type = c("stream", "node")) {
rlang::arg_match(type)
d <-
sf::st_read(path,
stringsAsFactors = FALSE,
crs = 4612,
as_tibble = TRUE) %>%
st_transform(crs = 4326)
d
}
ksj_data_url <- function(identifier = identifier, ...) {
build_req_url("getKSJURL", identifier = identifier, ...) %>%
request_to_ksj()
}
ksj_parse_a16 <- function(path) {
d <- sf::st_read(path,
crs = 6668,
as_tibble = TRUE,
stringsAsFactors = FALSE,
int64_as_string = TRUE,
options = "ENCODING=cp932") %>%
sf::st_transform(crs = 4326) %>%
dplyr::mutate(A16_001 = stringr::str_pad(A16_001, width = 7, pad = "0"))
xml_info <-
list.files(gsub(pattern = "(.+)/.+$",
replacement = "\\1",
path),
pattern = paste0("^A16.+",
".xml"),
full.names = TRUE)
if (length(xml_info) == 1) {
xml_info <-
xml_info %>%
xml2::read_xml() %>%
xml2::as_list()
fix_names <-
c(names(xml_info$Dataset$DID)[-2],
"geometry")
d <-
d %>%
purrr::set_names(fix_names)
d[[length(fix_names) - 1]] <-
xml_info$Dataset$DID$censusYear[[1]]$timePosition[[1]]
} else {
yy <- gsub("_.+", "", gsub("A16-", "", basename(path)))
if (rlang::is_true(stringr::str_detect(
yy,
paste0(stringr::str_pad(c(seq(60, 95, by = 5),
seq(0, 15, by = 5)),
width = 2,
pad = "0"),
collapse = "|"))))
d <- d %>%
dplyr::mutate(A16_011 = dplyr::case_when(
yy %in% c(seq(60, 95, by = 5)) ~ paste0("19", yy),
yy %in% c(seq(0, 15, by = 5)) ~ paste0("20", yy)
))
}
d
}
ksj_parse_a23 <- function(path) {
d <-
sf::st_read(path,
stringsAsFactors = FALSE,
as_tibble = TRUE,
crs = 6668,
options = c("ENCODING=CP932")) %>%
sf::st_transform(crs = 4326)
d
}
df_particularSoilTypeCode <-
tibble::tribble(
~particularSoilTypeCode, ~soil,
1, "シラス・ボヤ・コラ・赤ホヤ・花崗岩風化土",
2, "赤ホヤ・シラス・花崗岩風化土",
3, "赤ホヤ",
4, "赤ホヤ・花崗岩風化土",
5, "花崗岩風化土",
6, "ヨナ・赤ホヤ・花崗岩風化土・シラス",
7, "富士マサ",
8, "赤ホヤ・ヨナ")
ksj_parse_a30a5 <- function(path) {
d <-
sf::st_read(path,
stringsAsFactors = FALSE,
crs = 4612,
as_tibble = TRUE) %>%
dplyr::mutate_if(is.character, dplyr::na_if, y = "空白")
xml_info <-
list.files(gsub(pattern = "(.+)/.+$",
replacement = "\\1",
path),
pattern = paste0("^",
gsub("_SedimentDisasterAndSnowslide.+", "", basename(path)),
".xml"),
full.names = TRUE)
if (length(xml_info) == 1) {
# xml_info <-
# xml_info %>%
# xml2::read_xml() %>%
# xml2::as_list()
d <-
d %>%
purrr::set_names(
c(
# names(xml_info$Dataset$SedimentDisasterAndSnowslide$generatePlace),
"prefectureName", "cityName",
"hazardDate", "hazardType",
stringr::str_glue("{x[1]}_{x[2]}",
x = c(stringr::str_split("maxRainfallFor24h", "(?=24h)", simplify = TRUE))),
paste0("maxRainfall", "_h"),
"inclination",
paste0("outflowSediment", "_m3"),
paste0("landslideLength", "_m"),
"meshCode",
# names(xml_info$Dataset$SedimentDisasterAndSnowslide)[-c(1, 9)],
attr(d, "sf_column")
)) %>%
dplyr::mutate(hazardDate = lubridate::as_date(hazardDate)) %>%
dplyr::mutate_at(dplyr::vars(tidyselect::starts_with("maxRainfall"),
outflowSediment_m3,
landslideLength_m),
readr::parse_number) %>%
dplyr::mutate_if(is.character,
stringi::stri_trans_general,
id = "nfkc")
}
d
}
ksj_parse_g04 <- function(path, scale = 1) {
d <-
sf::st_read(path,
as_tibble = TRUE,
crs = 4612,
stringsAsFactors = FALSE) %>%
sf::st_transform(crs = 4326)
xml_info <-
list.files(gsub(pattern = "(.+)/.+$",
replacement = "\\1",
path),
pattern = "^G04-a-11_.+.xml",
full.names = TRUE)
if (length(xml_info) == 1) {
xml_info <-
xml_info %>%
xml2::read_xml() %>%
xml2::as_list()
d <-
d %>%
purrr::set_names(c(unname(unlist(xml_info$Dataset$CompositeValue)),
"geometry")) %>%
purrr::set_names(c("meshcode",
# 整備データがない場合はunknown
paste0(c("mean", "max", "min"),
"_elevation"),
# 海面下=5、その他=0
"minimum_elevation_code",
# 0=方向なし、1=北、2=北東、3=東、4=東南、5=南、6=南西、7=西、8=北西
"max_slope_aspect",
"max_slope_angle",
"min_slope_aspect",
"min_slope_angle",
"mean_slope_aspect",
"geometry")) %>%
dplyr::mutate_at(dplyr::vars(tidyselect::contains("elevation"),
tidyselect::contains("slope")),
.funs = list(~ dplyr::na_if(., "unknown") %>%
as.numeric()))
} else {
d <-
d %>%
dplyr::mutate_at(dplyr::vars(tidyselect::num_range("G04a_", range = seq(2, 10), width = 3)),
.funs = list(~ dplyr::na_if(., "unknown") %>%
as.numeric()))
}
d
}
ksj_parse_l01 <- function(path) {
sf::st_read(path,
as_tibble = TRUE,
stringsAsFactors = FALSE,
crs = 4612) %>%
sf::st_transform(crs = 4326) %>%
dplyr::mutate(
L01_006 = as.numeric(L01_006),
L01_023 = stringi::stri_trans_general(L01_023, id = "nfkc")) %>%
# 欠損値へと変換
dplyr::mutate_if(is.character, .funs = list(~ na_if(., "_"))) %>%
# dplyr::mutate_at(dplyr::vars(
# tidyselect::num_range("L01_", c(seq.int(5, 7)
# #,
# # 24,
# #seq.int(34, 35),
# #seq.int(52, 53)
# ), width = 3),
# as.integer)) %>%
dplyr::mutate_at(dplyr::vars(
tidyselect::num_range("L01_", seq.int(8, 20), width = 3),
tidyselect::num_range("L01_", seq.int(28, 30), width = 3),
L01_054),
.funs = list(~ dplyr::recode(.,
`false` = FALSE,
`true` = TRUE)))
}
set_names_l01 <- function(data, xml_path) {
x <-
xml2::read_xml(xml_path) %>%
xml2::as_list()
data %>%
purrr::set_names(
c(
x$Dataset$LandPrice$representedLandCode$RepresentedLandCode %>% names(),
paste0("previous_", x$Dataset$LandPrice$previousRepresentedLandCode$RepresentedLandCode %>% names()),
names(x$Dataset$LandPrice[3]),
names(x$Dataset$LandPrice[5]),
paste0("attribute_change_", x$Dataset$LandPrice$attributeChange$AttributeChange %>% names()),
names(x$Dataset$LandPrice[7:40]),
"geometry"))
}
ksj_parse_l03 <- function(path, type = "a") {
d <- sf::st_read(path,
as_tibble = TRUE,
crs = 4612,
stringsAsFactors = FALSE,
options = c("ENCODING=CP932")) %>%
sf::st_transform(crs = 4326)
xml_info <-
list.files(gsub(pattern = "(.+)/.+$",
replacement = "\\1",
path),
pattern = "^L03-a.+.xml",
full.names = TRUE)
if (length(xml_info) == 1L) {
xml_info <-
xml_info %>%
xml2::read_xml() %>%
xml2::as_list()
d <- d %>%
purrr::set_names(c("meshcode", "ta", "nouchi", "sinrin", "kouch", "tatemono", "douro", "tetudou",
"other", "kasen", "kaihin", "kaisui", "golf", "geometry"))
}
d
}
ksj_parse_s05 <- function(path) {
d <- sf::st_read(path,
as_tibble = TRUE,
crs = 4612,
stringsAsFactors = FALSE,
options = c("ENCODING=CP932")) %>%
sf::st_transform(crs = 4326)
xml_info <-
list.files(gsub(pattern = "(.+)/.+$",
replacement = "\\1",
path),
pattern = "^S05-c.+.xml",
full.names = TRUE)
if (length(xml_info) == 1L) {
xml_info <-
xml_info %>%
xml2::read_xml() %>%
xml2::as_list()
d <-
d %>%
purrr::set_names(c(names(xml_info$Dataset$CommutersPerStation_TokyoUrbanArea)[-1],
"geometry"))
}
d
}
ksj_parse_s12 <- function(path) {
if (stringr::str_detect(basename(path), ".shp$")) {
d <- sf::st_read(path,
stringsAsFactors = FALSE,
crs = 6668,
as_tibble = TRUE,
options = c("ENCODING=CP932")) %>%
sf::st_transform(crs = 4326)
# xml_info <-
# list.files(gsub(pattern = "(.+)/.+$",
# replacement = "\\1",
# path),
# pattern = "^S12.+.xml",
# full.names = TRUE)
# if (length(xml_info) == 1L) {
# xml_info <-
# xml_info %>%
# xml2::read_xml() %>%
# xml2::as_list()
# }
} else if (stringr::str_detect(basename(path), ".geojson$")) {
d <- sf::st_read(path,
stringsAsFactors = FALSE,
as_tibble = TRUE) %>%
sf::st_transform(crs = 4326)
}
d
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment