Skip to content

Instantly share code, notes, and snippets.

View lvalnegri's full-sized avatar
📦
working on my R packages

luca lvalnegri

📦
working on my R packages
  • London, UK
View GitHub Profile
@lvalnegri
lvalnegri / app.R
Last active January 24, 2023 20:04
Canadian Postal Codes by Regions using #rstats with a #shiny web app
Rfuns::load_pkgs('data.table', 'leaflet', 'leaflet.extras', 'leafgl', 'shiny', 'sf')
yp <- qs::qread('./yp', nthreads = 6)
# setnames(yp, c('Place Name', 'FSA-Province'), c('PlaceName', 'FSAP'))
# qs::qsave(yp, './yp', nthreads = 6)
ypgw <- qs::qread('./ypgw', nthreads = 6)
# ypgw <- ypgw |> dplyr::rename('PlaceName' = 'Place Name', 'FSAP' = 'FSA-Province')
# qs::qsave(ypgw, './ypgw', nthreads = 6)
# yrw <- qs::qread('./yrw', nthreads = 6)
# yrws <- do.call( 'rbind', lapply( 1:nrow(yrw), \(x) yx <- yrw[x,] |> rmapshaper::ms_simplify()) )
# qs::qsave(yrws, './yrws', nthreads = 6)
# msoa: 7,264 Middle-Layer Super Output Census small areas in England and Wales (polygons)
# points: 1,531,286 active postcodes in England and Wales
library(data.table)
library(sf)
msoa <- RcensusUK::MSOA |> st_transform(27700)
points <- RpostcodesUK::postcodes[is_active == 1 & CTRY %in% c('ENG', 'WLS'), .(PCU, x_lon, y_lat)] gpoints <- st_as_sf(points, coords = c('x_lon', 'y_lat'), crs = 4326) |> st_transform(27700)
points[, MSOA := st_join(gpoints, msoa, join = st_within) |> subset(select = MSOA) |> st_drop_geometry()]
neighs <- spdep::poly2nb(msoa)
# NYC Taxi TLC Trip Record Data
# https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page
# year 2011, 176mln rows, 12GB RAM
setwd('./Rstorage')
nth <- parallel::detectCores() - 2 # Ryzen 9 5950X 32 cores (=> nth = 30)
data.table::setDTthreads(nth)
y <- fst::read_fst(file.path(Rfuns::data_path, 'us', 'nyc_taxi', '2011'), as.data.table = TRUE)
# csv: 11.2GB (zipped: GB)
@lvalnegri
lvalnegri / Rgeobench.R
Created November 8, 2022 20:41
For #rstats users interested in working with the #geoparquet format I've run a small benchmark comparing various formats for storage and ways of querying data, as well as different hardware The reference file is an #sf object, the complete Italian Census 2011 small areas, for a total of ~400K polygons with only an id column attached to the geom,…
# geoarrow: https://github.com/paleolimbot/geoarrow, remotes::install_github("paleolimbot/geoarrow")
# sfarrow: https://github.com/wcjochem/sfarrow, install.packages('sfarrow')
# qs: https://github.com/traversc/qs, install.packages("qs")
setwd('/home/datamaps/temp/Rgeobench/')
y <- readRDS('./0.rds') # 333.3MB
object.size(y) # 798,256,856 bytes - see Italian Census Tracts @ https://bit.ly/szn_cens
saveRDS(y, './0.rnc', compress = FALSE) # 541.2MB
sfarrow::st_write_parquet(y, './0.sfa') # 471.8MB
@lvalnegri
lvalnegri / data_read.R
Last active August 22, 2022 10:44
r_benchmark
dta <- palmerpenguins::penguins
ids <- sample(nrow(dta), 1e7, replace = TRUE)
dta <- dta[ids,] |> dplyr::mutate(across(where(is.factor), as.character))
tmpf_csv <- tempfile(pattern = '.csv')
data.table::fwrite(dta, tmpf_csv, nThread = 10)
tmpf_parquet <- tempfile(pattern = '.parquet')
arrow::write_parquet(dta, tmpf_parquet)
tmpf_qs <- tempfile()
qs::qsave(dta, tmpf_qs, nthreads = 10)
@lvalnegri
lvalnegri / app.R
Last active March 17, 2022 13:28
Covid Vaccines Adverse Reactions
dmpkg.funs::load_pkgs(c('data.table', 'DT', 'htmltools', 'shiny'))
dpath <- file.path(datauk_path, 'covid', 'vaccine')
dts <- fst::read_fst(file.path(dpath, 'vaccine_adverse_reactions'), as.data.table = TRUE)
dts[, rnk := NULL]
up_date <- format(as.Date(readLines(file.path(dpath, 'vaccine_adverse_reactions.date'))), '%d %B %Y')
js_footer <- function(x){
paste0(
'tot_', x, ' = api.column(', x, ', {search:"applied", page:"all"}).data().reduce(function(a, b){return a + b;})
@lvalnegri
lvalnegri / estimates_rail_stations_usage.R
Last active March 7, 2022 01:06
Estimates UK Rail stations usage
################################################################################
# UK * Estimates of rail station usage: passenger entries, exits, interchanges #
################################################################################
# ORR: https://dataportal.orr.gov.uk/statistics/usage/estimates-of-station-usage
# NLC: National Location Code, TLC: 3-letters Code, LTA: London Travelcard Area, NRR: Network Rail Region, SRS: Strategic Route Section
dmpkg.funs::load_pkgs('data.table', 'sf')
vars <- c('101' = 'full', '102' = 'reduced', '103' = 'season', '105' = 'total', '109' = 'interchanges')
@lvalnegri
lvalnegri / preliminary_census_counts_country_birth.R
Last active March 4, 2022 11:54
choropleth maps using R leaflet
#################################################################################
# England And Wales * Preliminary Census 2021 Counts by (some) Country of Birth #
#################################################################################
# ONS data: https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/adhocs/14354ct210001
library(data.table)
library(dplyr)
library(sf)
library(leaflet)
# Total Cases: https://coronavirus.data.gov.uk/ (updated after 4PM at minimum)
# Omicron split: https://www.gov.uk/government/publications/covid-19-omicron-daily-overview (updated at ?)
library(data.table)
library(ggplot2)
yc <- fread(
'https://api.coronavirus.data.gov.uk/v2/data?areaType=region&metric=newCasesBySpecimenDate&format=csv',
select = c(2, 4, 5)
)
pkgs <- c('data.table', 'httr', 'jsonlite')
lapply(pkgs, require, char = TRUE)
get_catch_postcodes <- function(x_lon, y_lat, tm = 120){
# <tm> input travel time is in minutes
# output travel time is in seconds
# output travel distance is in metres
appId <- 'INSERT HERE YOUR ID'
apiKey <- 'INSERT HERE YOUR API KEY'