json_url <- "https://gist.githubusercontent.com/vizowl/f9f2ef6c6221e28b103c66d7afc77985/raw/11b05a5cc921373d56f7d9b13b4f88f32aed3c4f/sample.json"
temp_file <- tempfile(fileext = tools::file_ext(json_url))
download.file(json_url, temp_file)
load_and_unlist <- function(file_path, query = "Items") {
init <- RcppSimdJson::fload(file_path, query = query)
init[] <- lapply(init, unlist, use.names = FALSE)
init
}
tibble::as_tibble(load_and_unlist(temp_file))
#> # A tibble: 1,000 x 9
#> what mobile rating client timestamp sourceIp tag id who
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 HOUSING false 5 258acb41-5dc2-4d74-9213… 1601452859… --- debate-20… 37eff5a8-a9c1-42b4-af4… Judith Co…
#> 2 ECONOMY false 1 4453930e-46df-4694-ae4f… 1601451300… --- debate-20… c4604411-6605-44ff-a17… Jacinda A…
#> 3 HEALTH false 2 e643ce11-6228-4f24-8d27… 1601455190… --- debate-20… 1f3fdfb4-fed0-492e-9d8… Jacinda A…
#> 4 HEALTH false 5 8e5c1606-4c9d-40a3-b7e4… 1601451161… --- debate-20… d565df23-e6da-42dd-846… Judith Co…
#> 5 ECONOMY false 5 8af58b60-1241-43a8-9b15… 1601468327… --- debate-20… 94a0a462-129e-400d-be6… Jacinda A…
#> 6 COVID RESP… false 5 1d5b7748-4251-49f9-a32e… 1601455811… --- debate-20… 86edfca2-16bd-4cc5-951… Judith Co…
#> 7 EDUCATION false 1 d5e79713-7c77-4f38-bece… 1601496188… --- debate-20… aa3fcd3f-b070-448d-9f6… Judith Co…
#> 8 COVID RESP… false 3 744291c7-6a8c-4f9e-9509… 1601475990… --- debate-20… 6e390d17-9b20-4e22-9b4… Judith Co…
#> 9 ECONOMY false 5 34d9a2b2-d230-485d-a3e6… 1601452141… --- debate-20… c2db5910-5831-489f-a0f… Judith Co…
#> 10 HOUSING false 1 b27f66be-93cf-4b4c-8c7a… 1601457655… --- debate-20… 15f9f1e2-c164-49e5-b55… Jacinda A…
#> # … with 990 more rows
load_and_unnest <- function(file_path, query = "Items") {
init <- RcppSimdJson::fload(file_path, query = query)
col_names <- names(init)
tidyr::unnest(tidyr::unnest(init, cols = tidyselect::all_of(col_names)),
tidyselect::all_of(col_names)
)
}
load_and_unnest(temp_file)
#> # A tibble: 1,000 x 9
#> what mobile rating client timestamp sourceIp tag id who
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 HOUSING false 5 258acb41-5dc2-4d74-9213… 1601452859… --- debate-20… 37eff5a8-a9c1-42b4-af4… Judith Co…
#> 2 ECONOMY false 1 4453930e-46df-4694-ae4f… 1601451300… --- debate-20… c4604411-6605-44ff-a17… Jacinda A…
#> 3 HEALTH false 2 e643ce11-6228-4f24-8d27… 1601455190… --- debate-20… 1f3fdfb4-fed0-492e-9d8… Jacinda A…
#> 4 HEALTH false 5 8e5c1606-4c9d-40a3-b7e4… 1601451161… --- debate-20… d565df23-e6da-42dd-846… Judith Co…
#> 5 ECONOMY false 5 8af58b60-1241-43a8-9b15… 1601468327… --- debate-20… 94a0a462-129e-400d-be6… Jacinda A…
#> 6 COVID RESP… false 5 1d5b7748-4251-49f9-a32e… 1601455811… --- debate-20… 86edfca2-16bd-4cc5-951… Judith Co…
#> 7 EDUCATION false 1 d5e79713-7c77-4f38-bece… 1601496188… --- debate-20… aa3fcd3f-b070-448d-9f6… Judith Co…
#> 8 COVID RESP… false 3 744291c7-6a8c-4f9e-9509… 1601475990… --- debate-20… 6e390d17-9b20-4e22-9b4… Judith Co…
#> 9 ECONOMY false 5 34d9a2b2-d230-485d-a3e6… 1601452141… --- debate-20… c2db5910-5831-489f-a0f… Judith Co…
#> 10 HOUSING false 1 b27f66be-93cf-4b4c-8c7a… 1601457655… --- debate-20… 15f9f1e2-c164-49e5-b55… Jacinda A…
#> # … with 990 more rows
identical(load_and_unlist(temp_file),
as.data.frame(load_and_unnest(temp_file)))
#> [1] TRUE
microbenchmark::microbenchmark(
load_and_unlist(temp_file),
load_and_unnest(temp_file),
times = 5
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> load_and_unlist(temp_file) 3.600351 4.209864 4.838697 4.666646 4.792187 6.924436 5
#> load_and_unnest(temp_file) 232.921811 245.663934 280.810010 296.140538 309.914727 319.409038 5
Created
October 1, 2020 02:13
-
-
Save knapply/0348bd92d7750a5ee7136e36c82e6eb4 to your computer and use it in GitHub Desktop.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment