knapply / gaul.R
Last active October 27, 2021 09:17
Global Administrative Unit Layers (GAUL) data -- admin levels 1 and 2 (used in GDELT events table)
gaul_1_url <- ""
curl::curl_download(gaul_1_url, "gaul_adm1.kml")
gaul_2_url <- ""
curl::curl_download(gaul_2_url, "gaul_adm2.kml")
gaul_1 <- sf::read_sf("gaul_adm1.kml") |>
sf::st_cast("GEOMETRYCOLLECTION") |>
// [[Rcpp::plugins(cpp17)]]
#include <Rcpp.h>
#include <fast_float/fast_float.h>
// [[Rcpp::depends(RcppFastFloat)]]
// [[Rcpp::export]]
title: "JSON Benching"
output: github_document
chunk_output_type: console
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
options(width = 110)
json_url <- ""

temp_file <- tempfile(fileext = tools::file_ext(json_url))
download.file(json_url, temp_file)

load_and_unlist <- function(file_path, query = "Items") {
  init <- RcppSimdJson::fload(file_path, query = query)
  init[] <- lapply(init, unlist, use.names = FALSE)
knapply / get_mcc_mnc.R
Created September 22, 2020 17:50
Scrape MCCs and MNCs
get_mcc_mnc <- function() {
target_url <- ""
init <- xml2::read_html(target_url)
table_node <- rvest::html_node(init, "table")
out <- rvest::html_table(table_node)
out$MNC[out$MNC == "n/a"] <- NA_character_
out$MNC <- as.integer(out$MNC)
knapply / safety-bench.R
Created August 10, 2020 14:40
std::string_view(reinterpret_cast<const char*>(&(json[0])), std::size(json)) VS std::string(std::cbegin(json), std::cend(json))
##> [1] "890 MiB"
# parser.parse(
# std::string_view(reinterpret_cast<const char*>(&(json[0])), std::size(json))
# );
js_path <- yuge_json_file
raw_js <- readr::read_file_raw(js_path)
compressed_path <- "~/yuge.json.gz"
writeBin(memCompress(rawToChar(raw_js)), compressed_path)
.file_extension <- function(x, dot = TRUE, ignore_zip_ext = FALSE) {
if (ignore_zip_ext) {
base_name <- sub("\\.[bgx]z2?$", "", basename(x))
} else {
base_name <- basename(x)
captures <- regexpr("(?<!^|[.]|/)[.]([^.]+)$", base_name, perl = TRUE)
out <- rep(NA_character_, length(x))
out[captures > 0L] <- substring(base_name[captures > 0L], captures[captures > 0L])
if (dot) out else substring(out, 2L)