Skip to content

Instantly share code, notes, and snippets.

@carlganz
Last active May 28, 2018 21:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save carlganz/2aee3b57a827388a69d3a70f0ae9c850 to your computer and use it in GitHub Desktop.
Save carlganz/2aee3b57a827388a69d3a70f0ae9c850 to your computer and use it in GitHub Desktop.
library(dplyr)
library(survey)
data(nhanes)
# with clusters
svy <- svydesign(id=~SDMVPSU, strata=~SDMVSTRA, weights=~WTMEC2YR, nest=TRUE, data=nhanes)
svytotal(~HI_CHOL, svy, na.rm = TRUE)
### reproducing results with dplyr is simple
nhanes %>%
group_by(SDMVSTRA, SDMVPSU) %>%
# calculate total in each cluster
summarise(total = sum(WTMEC2YR * HI_CHOL, na.rm = TRUE)) %>%
# calculate variance between clusters within strata
summarise(variance = n() * var(total), total = sum(total)) %>%
# sum variance from each strata
summarise(total = sum(total), SE = sqrt(sum(variance)))
### just for computational demonstration pretend there are no clusters
svy_no_cluster <- svydesign(id=~0, strata = ~SDMVSTRA, weights=~WTMEC2YR, data=nhanes)
svytotal(~HI_CHOL, svy_no_cluster, na.rm = TRUE)
### reproducing with dplyr is wrong but I don't understand why
nhanes %>%
group_by(SDMVSTRA) %>%
summarise(total = sum(HI_CHOL * WTMEC2YR, na.rm = TRUE),
# calculate variance between observations within strata since we don't have clusters
variance = sum(!is.na(HI_CHOL)) * var(HI_CHOL * WTMEC2YR, na.rm = TRUE)) %>%
# sum variance from each strata
summarise(total = sum(total), SE = sqrt(sum(variance)))
@tslumley
Copy link

nhanes %>% mutate(WT_CHOL_notna=ifelse(is.na(HI_CHOL),0, WTMEC2YR), HI_CHOL_notna=ifelse(is.na(HI_CHOL),0,HI_CHOL)) %>%
  group_by(SDMVSTRA) %>%
  summarise(total = sum(HI_CHOL_notna * WT_CHOL_notna, na.rm = TRUE), 
            # calculate variance between observations within strata since we don't have clusters
            variance = sum(!is.na(HI_CHOL_notna)) * var(HI_CHOL_notna * WT_CHOL_notna, na.rm = TRUE)) %>%
  # sum variance from each strata
  summarise(total = sum(total), SE = sqrt(sum(variance)))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment