Last active
October 12, 2017 15:18
-
-
Save gergness/5145e2dcfdfa8abd49a6a48b886a5daf to your computer and use it in GitHub Desktop.
Exploring arrange with survey data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
``` r | |
suppressPackageStartupMessages({ | |
library(survey) | |
library(srvyr) | |
library(dplyr) | |
}) | |
# stratified sample from examples | |
data(api) | |
dstrata <- apistrat %>% | |
as_survey(strata = stype, weights = pw) | |
# The data is now stored in dstrata$variables | |
dstrata$variables %>% | |
select(stype, api99, pw) | |
#> # A tibble: 200 x 3 | |
#> stype api99 pw | |
#> * <fctr> <int> <dbl> | |
#> 1 E 816 44.21 | |
#> 2 E 476 44.21 | |
#> 3 E 544 44.21 | |
#> 4 E 457 44.21 | |
#> 5 E 659 44.21 | |
#> 6 E 780 44.21 | |
#> 7 E 787 44.21 | |
#> 8 E 731 44.21 | |
#> 9 E 508 44.21 | |
#> 10 E 658 44.21 | |
#> # ... with 190 more rows | |
# And the weighting information in dstrata$prob | |
head(dstrata$prob) | |
#> 1 2 3 4 5 6 | |
#> 0.02261932 0.02261932 0.02261932 0.02261932 0.02261932 0.02261932 | |
# prob is equal to 1/weight | |
all(dstrata$prob == (1 / dstrata$variables$pw)) | |
#> [1] TRUE | |
# We get the same results if we arrange before creating object | |
dstrata_rearranged <- apistrat %>% | |
arrange(api99) %>% | |
as_survey(strata = stype, weights = pw) | |
dstrata_rearranged %>% | |
group_by(stype) %>% | |
summarize(api99 = survey_mean(api99)) | |
#> # A tibble: 3 x 3 | |
#> stype api99 api99_se | |
#> <fctr> <dbl> <dbl> | |
#> 1 E 635.87 13.33941 | |
#> 2 H 617.36 15.80575 | |
#> 3 M 610.20 16.75894 | |
dstrata %>% | |
group_by(stype) %>% | |
summarize(api99 = survey_mean(api99)) | |
#> # A tibble: 3 x 3 | |
#> stype api99 api99_se | |
#> <fctr> <dbl> <dbl> | |
#> 1 E 635.87 13.33941 | |
#> 2 H 617.36 15.80575 | |
#> 3 M 610.20 16.75894 | |
# But if we arrange only the data without also rearranging the weighting information | |
# we get wrong results | |
dstrata_bad <- dstrata | |
dstrata_bad$variables <- dstrata_bad$variables %>% | |
arrange(api99) | |
dstrata_bad %>% | |
group_by(stype) %>% | |
summarize(api99 = survey_mean(api99)) | |
#> # A tibble: 3 x 3 | |
#> stype api99 api99_se | |
#> <fctr> <dbl> <dbl> | |
#> 1 E 624.0336 14.98117 | |
#> 2 H 611.6568 17.38391 | |
#> 3 M 608.0925 17.86467 | |
# When I wrote that vignette, I didn't understand that the survey package | |
# has a method for `[` that allows for this kind of rearrangement. | |
# I think I should add it to srvyr | |
dstrata_fixed <- dstrata | |
dstrata_fixed <- dstrata_fixed[order(dstrata_fixed$variables$api99)] | |
dstrata_fixed %>% | |
group_by(stype) %>% | |
summarize(api99 = survey_mean(api99)) | |
#> # A tibble: 3 x 3 | |
#> stype api99 api99_se | |
#> <fctr> <dbl> <dbl> | |
#> 1 E 635.87 13.33941 | |
#> 2 H 617.36 15.80575 | |
#> 3 M 610.20 16.75894 | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment