Skip to content

Instantly share code, notes, and snippets.

View bschneidr's full-sized avatar

Ben Schneider bschneidr

View GitHub Profile
@bschneidr
bschneidr / sample-groups-with-dplyr-and-tidyr.R
Created May 23, 2024 17:59
Sampling groups with 'dplyr' + 'tidyr'
library(dplyr)
library(tidyr)
library(survey)
# Load example data
# 'apipop' has one row per school
data('api', package = 'survey')
# Draw a sample of school districts
library(dplyr)
library(broom)
library(tidyr)
# Make example data
my_data <- data.frame(x = sample(1:5, size = 10, replace = TRUE),
y = sample(1:5, size = 10, replace = TRUE))
# Generate every pair of variables
var_list <- colnames(my_data)
@bschneidr
bschneidr / alt-design-syntax.R
Created December 11, 2022 14:51
Alternative survey design syntax
describe_design(
srs_stage(id = "PSU_ID", method = "SRS",
stratum = "PSU_STRATUM_ID"),
nonresponse_stage(response_indicator = "PSU_RESPONDENT"),
pps_stage(id = "SSU_ID", method = "PPS",
stratum = "SSU_STRATUM_ID"),
nonresponse_stage(response_indicator = "SSU_RESPONDENT")
)
@bschneidr
bschneidr / sample-variance-as-quadratic-form.R
Last active October 28, 2022 12:33
Expresses sample variance as a quadratic form
n = 5
# Establish quadratic form
quad_form_matrix <- matrix(nrow = n, ncol = n)
for (i in seq(n)) {
for (j in seq(n)) {
if (i == j) {
quad_form_matrix[i,j] <- (1/n)
} else {
@bschneidr
bschneidr / svy_prop_with_wilson_ci.R
Last active November 11, 2023 10:19
Wilson Confidence Interval for Complex Surveys
#' @title Wilson's confidence interval for complex survey designs
#' @description Calculate Wilson's confidence interval for a proportion,
#' with the effective sample size determined using a design-unbiased
#' estimate of the complex survey design effect.
#'
#' @param x A formula, vector, or matrix.
#' @param design A survey.design or svyrep.design object
#' @param na.rm Should cases with missing values be dropped?
#' @param level The confidence level required
#' @param ... Additional arguments to pass on to \code{svymean()}
@bschneidr
bschneidr / wilson-interval-for-complex-surveys.R
Last active October 12, 2022 14:30
Wilson's confidence interval for complex surveys
#' @title Wilson's confidence interval for complex survey designs
#' @description Calculate Wilson's confidence interval for a proportion,
#' with the effective sample size determined using a design-unbiased
#' estimate of the complex survey design effect.
#'
#' @param x A formula, vector, or matrix.
#' @param design A survey.design or svyrep.design object
#' @param na.rm Should cases with missing values be dropped?
#' @param level The confidence level required
#'
@bschneidr
bschneidr / finite-population-bayesian-bootstrap.R
Created September 22, 2022 14:25
Example of FPBB with the Louisville Vaccination Survey
suppressPackageStartupMessages({
library(survey)
library(svrep)
library(polyapost)
})
set.seed(1999)
# Load example survey data ----
data("lou_vax_survey", package = 'svrep')
@bschneidr
bschneidr / sort-matrix-using-vectors.R
Last active September 9, 2022 13:45
RcppArmadillo sorting matrix based on orders of corresponding vectors
library(Rcpp)
# Create an R function for sorting a matrix
# based on an accompanying vectors
cpp_code <- (
'using namespace Rcpp;
//#include <Rcpp.h>
#include <RcppArmadillo.h>
@bschneidr
bschneidr / survey-pv-analysis.R
Last active August 27, 2022 15:18
Analysis of plausible values with the survey package
# https://nces.ed.gov/surveys/piaac/datafiles.asp
# Load the survey data and prepare it for analysis ----
piaac17_puf <- file.path(
"C:\\Users\\benja\\Downloads\\spss\\prgusap1_puf.sav"
) |> haven::read_sav()
piaac17_rep_svy <- svrepdesign(
data = piaac17_puf,
repweights = "SPFWT[1-9]{1}0{0,1}",
weights = ~ SPFWT0,
@bschneidr
bschneidr / variance-of-nr-bias.R
Created April 5, 2022 18:03
Estimate the variance of non-response bias estimate, using the survey package
# Generate example population and sample ----
population <- data.frame(
vax_status = sample(x = c(0,1), prob = c(0.25, 0.75), size = 1000, replace = TRUE),
response_status = sample(x = c("Respondent", "Nonrespondent"),
size = 1000, replace = TRUE, prob = c(0.8, 0.2))
)
sample_data <- population[sample(x = 1000,size=150),]
# Create a survey design object ----