Skip to content

Instantly share code, notes, and snippets.

@benzipperer
benzipperer / weighted_percentiles.R
Created December 13, 2023 14:51
weighted percentiles and reshaping
library(tidyverse)
library(MetricsWeighted)
# here's how to calculate multiple weighted percentiles by year
# and reshape them so data is long in year but wide in percentiles
# below I explain this step by step
# first grab some data
cps_data <- epiextractr::load_org(1979:2022, year, orgwgt, wage) %>%
filter(wage > 0)
@benzipperer
benzipperer / stacked.R
Created November 16, 2023 21:48
stacked regression model tables using modelsummary and kableextra
library(tidyverse)
library(modelsummary)
library(kableExtra)
model_am1 = mtcars %>%
filter(am == 1) %>%
lm(mpg ~ wt, data = .)
model_am0 = mtcars %>%
filter(am == 0) %>%
@benzipperer
benzipperer / wage_ag051120.do
Last active October 25, 2023 16:50
modified replication package for DGLR 2020, to calculate mean employment of regression sample in Table A1, Women High School or Less
/* BEGIN BZ MODIFICATIONS */
/* this do-file requires the following files from the authors
medicaidelig.dta
UKCPR_National_Welfare_Data_Update_021520.dta
*/
copy https://github.com/Economic/state_geocodes/raw/master/state_geocodes.dta state_geocodes.dta, replace
use state_geocodes, clear
rename state_census stcps
@benzipperer
benzipperer / nursing_homes.R
Last active July 6, 2023 18:04
distribution of facility-level staffing ratios, by Census Region, June 2023
library(tidyverse)
library(ggridges)
# state codes
states_regions <- read_csv("https://raw.githubusercontent.com/cphalpert/census-regions/master/us%20census%20bureau%20regions%20and%20divisions.csv") %>%
select(state_abb = `State Code`, region = Region)
# CMS provider info
# https://data.cms.gov/provider-data/dataset/4pq5-n9py
raw <- read_csv("NH_ProviderInfo_Jun2023.csv") %>%
@benzipperer
benzipperer / school_shootings.R
Created November 16, 2022 15:57
plot time series of US K-12 shootings
library(tidyverse)
library(lubridate)
library(hrbrthemes)
# K-12 shooting incident data from https://www.chds.us/ssdb/data-map/
readxl::read_excel("SSDB_Raw_Data.xlsx", sheet = "INCIDENT") %>%
mutate(
year = year(ymd(Date)),
month = month(ymd(Date)),
month_date = ym(paste(year, month))
@benzipperer
benzipperer / wgt.R
Last active January 27, 2022 02:05
demonstrate the effects of Census rounding wages on Atlanta Fed Wage Tracker
library(tidyverse)
library(lubridate)
library(haven)
library(assertr)
library(slider)
library(hrbrthemes)
# function to bin hourly wages of hourly workers
bin_hourly <- function(df, new, old) {
df %>%
@benzipperer
benzipperer / lvm_stuff.md
Last active June 14, 2021 20:10
lvm stuff

stuff for LVM

create LVM partition on new physical volume

Find the new physical volume using fdisk -l. Let's say it is /dev/sda.

Then partition using fdisk /dev/sda. Type n to create primary partition, default partition 1, default sectors. Then type t to change partition type to 8e (Linux LVM). Then type w to write the partition table.

initialize physical volume for LVM

pvcreate /dev/sda1
@benzipperer
benzipperer / master.do
Last active May 7, 2021 16:16
change in monthly CES emp by wage quantile
clear all
set more off
* download CES data, if new
!wget -N https://download.bls.gov/pub/time.series/ce/ce.data.0.AllCESSeries
!wget -N https://download.bls.gov/pub/time.series/ce/ce.series
!wget -N https://download.bls.gov/pub/time.series/ce/ce.industry
* identify 3-digit industries
insheet using ce.industry, clear tab
@benzipperer
benzipperer / qcew_utils.R
Created March 3, 2021 20:43
example use of QCEW data with arrow in R
library(tidyverse)
library(arrow)
# utility to download single quarterly file for a year of data
# and partition it into years X aggregation levels
dl_partition_qcew <- function(year) {
url <- paste0("https://data.bls.gov/cew/data/files/",
year,
"/csv/",
year,