jamesdunham/ps.R

## ps.R
rm(list = ls())
library(dgo)
library(dplyr)

data(toy_dgirtfit)
data(targets)
targets = targets %>%
  group_by(year, state, race3) %>%
  summarize(proportion = sum(proportion)) %>%
  ungroup() %>%
  mutate_each(funs('as.character'), one_of('state', 'race3'))


samples = as.data.frame(toy_dgirtfit)
head(samples)
#        param state race3 year iteration     value
# 1: theta_bar    SC black 2009         1 2.5349865
# 2: theta_bar    SC black 2009         2 0.7095619
# 3: theta_bar    SC black 2009         3 0.4745461
# 4: theta_bar    SC black 2009         4 1.0656432
# 5: theta_bar    SC black 2009         5 1.3278823
# 6: theta_bar    SC black 2009         6 1.8961256

iters = max(samples$iteration)
replicate_by_iters = function(df, iters) {
  # Given a dataframe and a count, replicate the dataframe count times. Add to
  # each replication a column 'iteration', whose values give a unique value in
  # 1:count. Stack the result into a single dataframe.
  stopifnot(is.integer(iters))
  df_iters = lapply(seq_len(iters), function(i) {
    df %>% mutate(iteration = i)
  })
  df_iters = bind_rows(df_iters)
  stopifnot(nrow(df_iters) == nrow(df) * iters)
  df_iters
}

targets_by_iter = replicate_by_iters(targets, iters)
head(targets_by_iter)
# # A tibble: 6 × 5
#    year state race3   proportion iteration
#   <int> <chr> <chr>        <dbl>     <int>
# 1  1975    AK black 4.708026e-05         1
# 2  1975    AK other 2.426424e-04         1
# 3  1975    AK white 1.230413e-03         1
# 4  1975    AL black 3.947148e-03         1
# 5  1975    AL other 3.409934e-06         1
# 6  1975    AL white 1.298360e-02         1

ps_samples <- poststratify(samples, targets_by_iter, strata_names = c("state",
    "year", "iteration"), aggregated_name = "race3")
head(ps_samples)
#    state year iteration       value
# 1:    SC 2009         1 -0.64924274
# 2:    SC 2009         2 -0.23666138
# 3:    SC 2009         3 -0.04288345
# 4:    SC 2009         4 -0.28472934
# 5:    SC 2009         5 -0.69746489
# 6:    SC 2009         6 -1.11062931
	rm(list = ls())
	library(dgo)
	library(dplyr)

	data(toy_dgirtfit)
	data(targets)
	targets = targets %>%
	group_by(year, state, race3) %>%
	summarize(proportion = sum(proportion)) %>%
	ungroup() %>%
	mutate_each(funs('as.character'), one_of('state', 'race3'))


	samples = as.data.frame(toy_dgirtfit)
	head(samples)
	# param state race3 year iteration value
	# 1: theta_bar SC black 2009 1 2.5349865
	# 2: theta_bar SC black 2009 2 0.7095619
	# 3: theta_bar SC black 2009 3 0.4745461
	# 4: theta_bar SC black 2009 4 1.0656432
	# 5: theta_bar SC black 2009 5 1.3278823
	# 6: theta_bar SC black 2009 6 1.8961256

	iters = max(samples$iteration)
	replicate_by_iters = function(df, iters) {
	# Given a dataframe and a count, replicate the dataframe count times. Add to
	# each replication a column 'iteration', whose values give a unique value in
	# 1:count. Stack the result into a single dataframe.
	stopifnot(is.integer(iters))
	df_iters = lapply(seq_len(iters), function(i) {
	df %>% mutate(iteration = i)
	})
	df_iters = bind_rows(df_iters)
	stopifnot(nrow(df_iters) == nrow(df) * iters)
	df_iters
	}

	targets_by_iter = replicate_by_iters(targets, iters)
	head(targets_by_iter)
	# # A tibble: 6 × 5
	# year state race3 proportion iteration
	# <int> <chr> <chr> <dbl> <int>
	# 1 1975 AK black 4.708026e-05 1
	# 2 1975 AK other 2.426424e-04 1
	# 3 1975 AK white 1.230413e-03 1
	# 4 1975 AL black 3.947148e-03 1
	# 5 1975 AL other 3.409934e-06 1
	# 6 1975 AL white 1.298360e-02 1

	ps_samples <- poststratify(samples, targets_by_iter, strata_names = c("state",
	"year", "iteration"), aggregated_name = "race3")
	head(ps_samples)
	# state year iteration value
	# 1: SC 2009 1 -0.64924274
	# 2: SC 2009 2 -0.23666138
	# 3: SC 2009 3 -0.04288345
	# 4: SC 2009 4 -0.28472934
	# 5: SC 2009 5 -0.69746489
	# 6: SC 2009 6 -1.11062931