Skip to content

Instantly share code, notes, and snippets.

@knapply
Created October 1, 2020 02:13
Show Gist options
  • Save knapply/0348bd92d7750a5ee7136e36c82e6eb4 to your computer and use it in GitHub Desktop.
Save knapply/0348bd92d7750a5ee7136e36c82e6eb4 to your computer and use it in GitHub Desktop.
json_url <- "https://gist.githubusercontent.com/vizowl/f9f2ef6c6221e28b103c66d7afc77985/raw/11b05a5cc921373d56f7d9b13b4f88f32aed3c4f/sample.json"

temp_file <- tempfile(fileext = tools::file_ext(json_url))
download.file(json_url, temp_file)

load_and_unlist <- function(file_path, query = "Items") {
  init <- RcppSimdJson::fload(file_path, query = query)
  init[] <- lapply(init, unlist, use.names = FALSE)
  init
}

tibble::as_tibble(load_and_unlist(temp_file))
#> # A tibble: 1,000 x 9
#>    what        mobile rating client                   timestamp   sourceIp tag        id                      who       
#>    <chr>       <chr>  <chr>  <chr>                    <chr>       <chr>    <chr>      <chr>                   <chr>     
#>  1 HOUSING     false  5      258acb41-5dc2-4d74-9213… 1601452859… ---      debate-20… 37eff5a8-a9c1-42b4-af4… Judith Co…
#>  2 ECONOMY     false  1      4453930e-46df-4694-ae4f… 1601451300… ---      debate-20… c4604411-6605-44ff-a17… Jacinda A…
#>  3 HEALTH      false  2      e643ce11-6228-4f24-8d27… 1601455190… ---      debate-20… 1f3fdfb4-fed0-492e-9d8… Jacinda A…
#>  4 HEALTH      false  5      8e5c1606-4c9d-40a3-b7e4… 1601451161… ---      debate-20… d565df23-e6da-42dd-846… Judith Co…
#>  5 ECONOMY     false  5      8af58b60-1241-43a8-9b15… 1601468327… ---      debate-20… 94a0a462-129e-400d-be6… Jacinda A…
#>  6 COVID RESP… false  5      1d5b7748-4251-49f9-a32e… 1601455811… ---      debate-20… 86edfca2-16bd-4cc5-951… Judith Co…
#>  7 EDUCATION   false  1      d5e79713-7c77-4f38-bece… 1601496188… ---      debate-20… aa3fcd3f-b070-448d-9f6… Judith Co…
#>  8 COVID RESP… false  3      744291c7-6a8c-4f9e-9509… 1601475990… ---      debate-20… 6e390d17-9b20-4e22-9b4… Judith Co…
#>  9 ECONOMY     false  5      34d9a2b2-d230-485d-a3e6… 1601452141… ---      debate-20… c2db5910-5831-489f-a0f… Judith Co…
#> 10 HOUSING     false  1      b27f66be-93cf-4b4c-8c7a… 1601457655… ---      debate-20… 15f9f1e2-c164-49e5-b55… Jacinda A…
#> # … with 990 more rows

load_and_unnest <- function(file_path, query = "Items") {
  init <- RcppSimdJson::fload(file_path, query = query)
  col_names <- names(init)
  tidyr::unnest(tidyr::unnest(init, cols = tidyselect::all_of(col_names)),
                tidyselect::all_of(col_names)
  )
}

load_and_unnest(temp_file)
#> # A tibble: 1,000 x 9
#>    what        mobile rating client                   timestamp   sourceIp tag        id                      who       
#>    <chr>       <chr>  <chr>  <chr>                    <chr>       <chr>    <chr>      <chr>                   <chr>     
#>  1 HOUSING     false  5      258acb41-5dc2-4d74-9213… 1601452859… ---      debate-20… 37eff5a8-a9c1-42b4-af4… Judith Co…
#>  2 ECONOMY     false  1      4453930e-46df-4694-ae4f… 1601451300… ---      debate-20… c4604411-6605-44ff-a17… Jacinda A…
#>  3 HEALTH      false  2      e643ce11-6228-4f24-8d27… 1601455190… ---      debate-20… 1f3fdfb4-fed0-492e-9d8… Jacinda A…
#>  4 HEALTH      false  5      8e5c1606-4c9d-40a3-b7e4… 1601451161… ---      debate-20… d565df23-e6da-42dd-846… Judith Co…
#>  5 ECONOMY     false  5      8af58b60-1241-43a8-9b15… 1601468327… ---      debate-20… 94a0a462-129e-400d-be6… Jacinda A…
#>  6 COVID RESP… false  5      1d5b7748-4251-49f9-a32e… 1601455811… ---      debate-20… 86edfca2-16bd-4cc5-951… Judith Co…
#>  7 EDUCATION   false  1      d5e79713-7c77-4f38-bece… 1601496188… ---      debate-20… aa3fcd3f-b070-448d-9f6… Judith Co…
#>  8 COVID RESP… false  3      744291c7-6a8c-4f9e-9509… 1601475990… ---      debate-20… 6e390d17-9b20-4e22-9b4… Judith Co…
#>  9 ECONOMY     false  5      34d9a2b2-d230-485d-a3e6… 1601452141… ---      debate-20… c2db5910-5831-489f-a0f… Judith Co…
#> 10 HOUSING     false  1      b27f66be-93cf-4b4c-8c7a… 1601457655… ---      debate-20… 15f9f1e2-c164-49e5-b55… Jacinda A…
#> # … with 990 more rows


identical(load_and_unlist(temp_file),
          as.data.frame(load_and_unnest(temp_file)))
#> [1] TRUE


microbenchmark::microbenchmark(
  load_and_unlist(temp_file),
  load_and_unnest(temp_file),
  times = 5
)
#> Unit: milliseconds
#>                        expr        min         lq       mean     median         uq        max neval
#>  load_and_unlist(temp_file)   3.600351   4.209864   4.838697   4.666646   4.792187   6.924436     5
#>  load_and_unnest(temp_file) 232.921811 245.663934 280.810010 296.140538 309.914727 319.409038     5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment