Robin Edwards geotheory

## root-domain.R
# Vectorised workflow to extract the root domains from a list of urls where possible,
# based on identification of the url segment leftwardly adjacent to its public suffix.
# Handling for used domains that are also a registered public suffix.
# Handling also for domains unmatched in public suffix (inc. punycode URLs and URLs of top-level domains
# that are not registered as public suffixes (notable .bd, .ck, .er, .fk, .jm, .kh, .mm, .np, .pg, .za).
# Unrecognised domains will be returned as is.

# latest official list of public suffixes
psl_data = readLines("https://publicsuffix.org/list/public_suffix_list.dat")
psl_data = psl_data[!grepl("^(//|\\s*$)", psl_data)] |> tolower()

## gdelt-headlines.R

suppressPackageStartupMessages({
  require(DT)
  require(magrittr)
})

Sys.setenv(RSTUDIO_PANDOC = "/Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/x86_64")

unescape_html = function(str) paste0("<x>",str,"</x>") |> xml2::read_html() |> xml2::xml_text()

## geometric-mean.R
geo_mean = funcction(x) log(x) |> mean() |> exp()

## ggiraph-alt-text.R

library(ggplot2)
library(ggiraph)

data <- mtcars
data$carname <- row.names(data)

gg_point = ggplot(data = data) +
  geom_point_interactive(aes(x = wt, y = qsec, color = disp,
                             tooltip = carname, data_id = carname)) +

## mercator-web-map-tile-conversion.R

# Conversion between Web Map Tiles and Mercator coordinates

require(tibble)

merc_to_wmt = function(x, y, z){
  m_ext = pi * 6378137 # Mercator extent
  res = 2 ^ (1:14)[z]  # grid size
  x_grid = scales::rescale(x, from = c(-m_ext, m_ext), to = c(0, res)) |> floor()
  y_grid = scales::rescale(y, from = c(m_ext, -m_ext), to = c(0, res)) |> floor()

## tmap-pixel-bug.R
require(tmap)
require(dplyr)
require(stars)
require(shiny)

server = shinyServer(function(input, output, session) {

  tmap_mode("view")

  output$map = renderTmap({

## plotly-map-accuracy.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<style>body{background-color:white;}</style>
<script src="data:application/javascript;base64,KGZ1bmN0aW9uKCkgewogIC8vIElmIHdpbmRvdy5IVE1MV2lkZ2V0cyBpcyBhbHJlYWR5IGRlZmluZWQsIHRoZW4gdXNlIGl0OyBvdGhlcndpc2UgY3JlYXRlIGEKICAvLyBuZXcgb2JqZWN0LiBUaGlzIGFsbG93cyBwcmVjZWRpbmcgY29kZSB0byBzZXQgb3B0aW9ucyB0aGF0IGFmZmVjdCB0aGUKICAvLyBpbml0aWFsaXphdGlvbiBwcm9jZXNzICh0aG91Z2ggbm9uZSBjdXJyZW50bHkgZXhpc3QpLgogIHdpbmRvdy5IVE1MV2lkZ2V0cyA9IHdpbmRvdy5IVE1MV2lkZ2V0cyB8fCB7fTsKCiAgLy8gU2VlIGlmIHdlJ3JlIHJ1bm5pbmcgaW4gYSB2aWV3ZXIgcGFuZS4gSWYgbm90LCB3ZSdyZSBpbiBhIHdlYiBicm93c2VyLgogIHZhciB2aWV3ZXJNb2RlID0gd2luZG93LkhUTUxXaWRnZXRzLnZpZXdlck1vZGUgPQogICAgICAvXGJ2aWV3ZXJfcGFuZT0xXGIvLnRlc3Qod2luZG93LmxvY2F0aW9uKTsKCiAgLy8gU2VlIGlmIHdlJ3JlIHJ1bm5pbmcgaW4gU2hpbnkgbW9kZS4gSWYgbm90LCBpdCdzIGEgc3RhdGljIGRvY3VtZW50LgogIC8vIE5vdGUgdGhhdCBzdGF0aWMgd2lkZ2V0cyBjYW4gYXBwZWFyIGluIGJvdGggU2hpbnkgYW5kIHN0YXRpYyBtb2RlcywgYnV0CiAgLy8gb2J2aW91c2x5LCBTaGlueSB3aWRnZXRzIGNhbiBvbmx5IGFwcGVhci

## test-grid-data.csv

          
            n
            lat
            lon

            
              342
              14.765625
              45.703125

            
              307
              21.796875
              94.921875

            
              265
              37.265625
              127.265625

            
              172
              31.640625
              35.859375

            
              156
              31.640625
              34.453125

            
              151
              35.859375
              37.265625

            
              141
              48.515625
              38.671875

            
              138
              35.859375
              35.859375

            
              138
              18.984375
              -99.140625

## escape-triple-quoted.R
# function for parsing strings where quotes are not escaped but nested inside triple-quotes

escape_triple_quoted = function(j){
  j_split = strsplit(j, '"{3}')[[1]]
  f = seq_along(j_split) %% 2 == 0  # filter
  j_split[f] = gsub('"', '\\\\"', j_split[f])
  paste(j_split, collapse = '"')
}

# Usage

## fill-missing-timeseries.R
# identify and fill missing data points in timeseries data with regular-periodicity

require(padr)
#> Loading required package: padr

df <- data.frame(date = as.Date('2016-04-01') + 3*c(0,1,3,5,6), val = sample(5)) |>
  print()
#>         date val
#> 1 2016-04-01   3
#> 2 2016-04-04   1
	# Vectorised workflow to extract the root domains from a list of urls where possible,
	# based on identification of the url segment leftwardly adjacent to its public suffix.
	# Handling for used domains that are also a registered public suffix.
	# Handling also for domains unmatched in public suffix (inc. punycode URLs and URLs of top-level domains
	# that are not registered as public suffixes (notable .bd, .ck, .er, .fk, .jm, .kh, .mm, .np, .pg, .za).
	# Unrecognised domains will be returned as is.

	# latest official list of public suffixes
	psl_data = readLines("https://publicsuffix.org/list/public_suffix_list.dat")
	psl_data = psl_data[!grepl("^(//\|\\s*$)", psl_data)] \|> tolower()

	suppressPackageStartupMessages({
	require(DT)
	require(magrittr)
	})

	Sys.setenv(RSTUDIO_PANDOC = "/Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/x86_64")

	unescape_html = function(str) paste0("<x>",str,"</x>") \|> xml2::read_html() \|> xml2::xml_text()

	library(ggplot2)
	library(ggiraph)

	data <- mtcars
	data$carname <- row.names(data)

	gg_point = ggplot(data = data) +
	geom_point_interactive(aes(x = wt, y = qsec, color = disp,
	tooltip = carname, data_id = carname)) +

	# Conversion between Web Map Tiles and Mercator coordinates

	require(tibble)

	merc_to_wmt = function(x, y, z){
	m_ext = pi * 6378137 # Mercator extent
	res = 2 ^ (1:14)[z] # grid size
	x_grid = scales::rescale(x, from = c(-m_ext, m_ext), to = c(0, res)) \|> floor()
	y_grid = scales::rescale(y, from = c(m_ext, -m_ext), to = c(0, res)) \|> floor()
	require(tmap)
	require(dplyr)
	require(stars)
	require(shiny)

	server = shinyServer(function(input, output, session) {

	tmap_mode("view")

	output$map = renderTmap({
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="utf-8" />
	<style>body{background-color:white;}</style>
	<script src="data:application/javascript;base64,KGZ1bmN0aW9uKCkgewogIC8vIElmIHdpbmRvdy5IVE1MV2lkZ2V0cyBpcyBhbHJlYWR5IGRlZmluZWQsIHRoZW4gdXNlIGl0OyBvdGhlcndpc2UgY3JlYXRlIGEKICAvLyBuZXcgb2JqZWN0LiBUaGlzIGFsbG93cyBwcmVjZWRpbmcgY29kZSB0byBzZXQgb3B0aW9ucyB0aGF0IGFmZmVjdCB0aGUKICAvLyBpbml0aWFsaXphdGlvbiBwcm9jZXNzICh0aG91Z2ggbm9uZSBjdXJyZW50bHkgZXhpc3QpLgogIHdpbmRvdy5IVE1MV2lkZ2V0cyA9IHdpbmRvdy5IVE1MV2lkZ2V0cyB8fCB7fTsKCiAgLy8gU2VlIGlmIHdlJ3JlIHJ1bm5pbmcgaW4gYSB2aWV3ZXIgcGFuZS4gSWYgbm90LCB3ZSdyZSBpbiBhIHdlYiBicm93c2VyLgogIHZhciB2aWV3ZXJNb2RlID0gd2luZG93LkhUTUxXaWRnZXRzLnZpZXdlck1vZGUgPQogICAgICAvXGJ2aWV3ZXJfcGFuZT0xXGIvLnRlc3Qod2luZG93LmxvY2F0aW9uKTsKCiAgLy8gU2VlIGlmIHdlJ3JlIHJ1bm5pbmcgaW4gU2hpbnkgbW9kZS4gSWYgbm90LCBpdCdzIGEgc3RhdGljIGRvY3VtZW50LgogIC8vIE5vdGUgdGhhdCBzdGF0aWMgd2lkZ2V0cyBjYW4gYXBwZWFyIGluIGJvdGggU2hpbnkgYW5kIHN0YXRpYyBtb2RlcywgYnV0CiAgLy8gb2J2aW91c2x5LCBTaGlueSB3aWRnZXRzIGNhbiBvbmx5IGFwcGVhci
n	lat	lon
342	14.765625	45.703125
307	21.796875	94.921875
265	37.265625	127.265625
172	31.640625	35.859375
156	31.640625	34.453125
151	35.859375	37.265625
141	48.515625	38.671875
138	35.859375	35.859375
138	18.984375	-99.140625
	# function for parsing strings where quotes are not escaped but nested inside triple-quotes

	escape_triple_quoted = function(j){
	j_split = strsplit(j, '"{3}')[[1]]
	f = seq_along(j_split) %% 2 == 0 # filter
	j_split[f] = gsub('"', '\\\\"', j_split[f])
	paste(j_split, collapse = '"')
	}

	# Usage
	# identify and fill missing data points in timeseries data with regular-periodicity

	require(padr)
	#> Loading required package: padr

	df <- data.frame(date = as.Date('2016-04-01') + 3*c(0,1,3,5,6), val = sample(5)) \|>
	print()
	#> date val
	#> 1 2016-04-01 3
	#> 2 2016-04-04 1