Skip to content

Instantly share code, notes, and snippets.

View knapply's full-sized avatar
🕵️

knapply

🕵️
View GitHub Profile
@knapply
knapply / gaul.R
Last active October 27, 2021 09:17
Global Administrative Unit Layers (GAUL) data -- admin levels 1 and 2 (used in GDELT events table)
gaul_1_url <- "https://data.apps.fao.org/map/gsrv/gsrv1/gaul/wfs?service=WFS&version=1.1.0&request=GetFeature&typeName=gaul:g2015_2014_1&srsName=EPSG%3A4326&maxFeatures=900000"
curl::curl_download(gaul_1_url, "gaul_adm1.kml")
gaul_2_url <- "https://data.apps.fao.org/map/gsrv/gsrv1/gaul/wfs?service=WFS&version=1.1.0&request=GetFeature&typeName=gaul:g2015_2014_2&srsName=EPSG%3A4326&maxFeatures=900000"
curl::curl_download(gaul_2_url, "gaul_adm2.kml")
gaul_1 <- sf::read_sf("gaul_adm1.kml") |>
sf::st_cast("GEOMETRYCOLLECTION") |>
// [[Rcpp::plugins(cpp17)]]
#include <Rcpp.h>
#include <fast_float/fast_float.h>
// [[Rcpp::depends(RcppFastFloat)]]
// [[Rcpp::export]]
Note: Setting PKG_ROOT variable to the current directory.
Output will be in ./lib and ./build subdirectories.
If it is not, maybe your local singularity configuration does
not allow singularity container to access your current directory;
it may help to try with PKG_ROOT=/tmp or check the configurarion.
install.packages("RcppSimdJson", repos='https://cloud.r-project.org')
Installing package into ‘/home/brendan/lib’
(as ‘lib’ is unspecified)
also installing the dependency ‘Rcpp’
---
title: "JSON Benching"
output: github_document
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
options(width = 110)
json_url <- "https://gist.githubusercontent.com/vizowl/f9f2ef6c6221e28b103c66d7afc77985/raw/11b05a5cc921373d56f7d9b13b4f88f32aed3c4f/sample.json"

temp_file <- tempfile(fileext = tools::file_ext(json_url))
download.file(json_url, temp_file)

load_and_unlist <- function(file_path, query = "Items") {
  init <- RcppSimdJson::fload(file_path, query = query)
  init[] <- lapply(init, unlist, use.names = FALSE)
  init
[{"MCC":289,"MNC":88,"ISO":"ge","Country":"Abkhazia","Country Code":7,"Network":"A-Mobile"},{"MCC":289,"MNC":68,"ISO":"ge","Country":"Abkhazia","Country Code":7,"Network":"A-Mobile"},{"MCC":289,"MNC":67,"ISO":"ge","Country":"Abkhazia","Country Code":7,"Network":"Aquafon"},{"MCC":412,"MNC":88,"ISO":"af","Country":"Afghanistan","Country Code":93,"Network":"Afghan Telecom Corp. (AT)"},{"MCC":412,"MNC":80,"ISO":"af","Country":"Afghanistan","Country Code":93,"Network":"Afghan Telecom Corp. (AT)"},{"MCC":412,"MNC":1,"ISO":"af","Country":"Afghanistan","Country Code":93,"Network":"Afghan Wireless/AWCC"},{"MCC":412,"MNC":40,"ISO":"af","Country":"Afghanistan","Country Code":93,"Network":"Areeba/MTN"},{"MCC":412,"MNC":50,"ISO":"af","Country":"Afghanistan","Country Code":93,"Network":"Etisalat"},{"MCC":412,"MNC":30,"ISO":"af","Country":"Afghanistan","Country Code":93,"Network":"Etisalat"},{"MCC":412,"MNC":20,"ISO":"af","Country":"Afghanistan","Country Code":93,"Network":"Roshan/TDCA"},{"MCC":412,"MNC":3,"ISO":"af","Count
MCC MNC ISO Country Country Code Network
289 88 ge Abkhazia 7 A-Mobile
289 68 ge Abkhazia 7 A-Mobile
289 67 ge Abkhazia 7 Aquafon
412 88 af Afghanistan 93 Afghan Telecom Corp. (AT)
412 80 af Afghanistan 93 Afghan Telecom Corp. (AT)
412 1 af Afghanistan 93 Afghan Wireless/AWCC
412 40 af Afghanistan 93 Areeba/MTN
412 50 af Afghanistan 93 Etisalat
412 30 af Afghanistan 93 Etisalat
@knapply
knapply / get_mcc_mnc.R
Created September 22, 2020 17:50
Scrape MCCs and MNCs
get_mcc_mnc <- function() {
target_url <- "http://mcc-mnc.com/"
init <- xml2::read_html(target_url)
table_node <- rvest::html_node(init, "table")
out <- rvest::html_table(table_node)
out$MNC[out$MNC == "n/a"] <- NA_character_
out$MNC <- as.integer(out$MNC)
@knapply
knapply / safety-bench.R
Created August 10, 2020 14:40
std::string_view(reinterpret_cast<const char*>(&(json[0])), std::size(json)) VS std::string(std::cbegin(json), std::cend(json))
scales::number_bytes(file.size(yuge_json_file))
##> [1] "890 MiB"
# parser.parse(
# std::string_view(reinterpret_cast<const char*>(&(json[0])), std::size(json))
# );
js_path <- yuge_json_file
raw_js <- readr::read_file_raw(js_path)
compressed_path <- "~/yuge.json.gz"
writeBin(memCompress(rawToChar(raw_js)), compressed_path)
.file_extension <- function(x, dot = TRUE, ignore_zip_ext = FALSE) {
if (ignore_zip_ext) {
base_name <- sub("\\.[bgx]z2?$", "", basename(x))
} else {
base_name <- basename(x)
}
captures <- regexpr("(?<!^|[.]|/)[.]([^.]+)$", base_name, perl = TRUE)
out <- rep(NA_character_, length(x))
out[captures > 0L] <- substring(base_name[captures > 0L], captures[captures > 0L])
if (dot) out else substring(out, 2L)