Skip to content

Instantly share code, notes, and snippets.

@arthurgailes
arthurgailes / sum_wide.R
Created April 3, 2024 18:17
duckdplyr summarizing wide data
# Load necessary library
pacman::p_load(
stringi, data.table, duckplyr, readr, dplyr, collapse, duckdb, dbplyr, bench,
ggplot2
)
# Function to generate a dataframe chunk
generate_data_chunk <- function(num_rows = 1000, num_cols = 100) {
# Generate numeric columns
numeric_cols <- replicate(n = num_cols/2, expr = runif(num_rows, 1, 10000), simplify = FALSE)
@arthurgailes
arthurgailes / duckplyr_read_csv.R
Created April 3, 2024 16:13
duckplyr csv reading benchmark
# Load necessary library
pacman::p_load(
stringi, data.table, duckplyr, readr, dplyr, collapse, duckdb, dbplyr, bench,
ggplot2
)
# Function to generate a dataframe chunk
generate_data_chunk <- function(num_rows = 1000, num_cols = 100) {
# Generate numeric columns
numeric_cols <- replicate(n = num_cols/2, expr = runif(num_rows, 1, 10000), simplify = FALSE)
@arthurgailes
arthurgailes / fix_census_2022_blocks.R
Created December 21, 2023 16:28
Fix Census 2022 blocks with tigris
#' Fix broken Census Connecticut blocks post-2022
#' See: https://github.com/walkerke/tigris/issues/178
library(tigris)
library(sf)
library(dplyr)
library(readr)
library(testthat)
# load official CT crosswalk
# missing a few block ids, but tracts are a consistent 1:1 merge so use those
@arthurgailes
arthurgailes / r_package_downloads.R
Created August 14, 2023 12:57
Quickly show time series for R package downloads
# quick R script for seeing the trends in downloads for a package
library(cranlogs)
library(ggplot2)
library(collapse)
library(lubridate)
package <- "shiny"
# get downloads for a specific date
x <- cran_downloads(packages=shiny, from="2015-06-01", to="2023-08-14")
head(x)
@arthurgailes
arthurgailes / collapse_sf.R
Created July 28, 2023 13:24
R: collapse and sf
# list and unlist an sf object
library(collapse)
library(sf)
library(testthat)
nc <- st_read(system.file("shape/nc.shp", package="sf"))
nc_list <- rsplit(nc, by = seq_len(nrow(nc)))
nc_collapse <- unlist2d(nc_list, idcols = FALSE, recursive = FALSE) |>
@arthurgailes
arthurgailes / postgres_helpers.R
Created June 7, 2023 13:02
R PostgreSQL Helpers
# Get all indexes, then drop and add before and after writing to table
# Speeds up writes
get_index_table <- function(con, tablename, schemaname) {
index_df <- dbGetQuery(con, paste0(
"SELECT indexname, indexdef FROM pg_indexes
WHERE tablename = '", tablename, "' AND schemaname = '", schemaname, "'"))
index_df <- subset(index_df, !grepl("[pf]key", indexname))
index_df$indexname <- paste0(schemaname, ".", index_df$indexname)
@arthurgailes
arthurgailes / furrr_maps.R
Last active April 10, 2023 01:51 — forked from walkerke/purrr_maps.R
comparing purrr to furrr
if(!require(pacman)) install.packages('pacman')
pacman::p_load(tigris, tidycensus, furrr, doFuture, purrr, tictoc, ggplot2)
options(tigris_use_cache = FALSE) # make things equal between runs
state_names <- c(state.name, "District of Columbia")
names(state_names) <- state_names
# purrr:
tictoc::tic()
@arthurgailes
arthurgailes / census_commute.R
Last active September 24, 2022 01:02
Census Commute Shares in TidyCensus
# Create census commute share in Portland
if(!require(pacman)) install.packages('pacman')
pacman::p_load(dplyr, tidycensus)
# https://data.census.gov/cedsci/table?q=means%20transportation&g=1600000US4159000&tid=ACSDT1Y2021.B08006
trans_table <- 'B08006'
# note, no data for 2020, the 2005 data is corrupt :
# the non-auto counts are off
years <- c(2006:2019, 2021)
@arthurgailes
arthurgailes / .block
Created May 1, 2020 22:37
dc.js interactive example
license: mit
@arthurgailes
arthurgailes / .block
Last active May 1, 2020 22:37
dc.js interactive example
license: mit