Skip to content

Instantly share code, notes, and snippets.

@nhatley
Last active August 22, 2020 15:47
Show Gist options
  • Save nhatley/4ed3f3ced6aa44ade7531cf49a135467 to your computer and use it in GitHub Desktop.
Save nhatley/4ed3f3ced6aa44ade7531cf49a135467 to your computer and use it in GitHub Desktop.
#code from start to finish
##install or update packages if neccessary
#install.packages("tidyverse")
#install.packages("haven")
##load packages in
library(tidyverse) #loads all "core" tidyverse packages like dplyr, tidyr, forcats, and ggplot2
library(haven)
##read dataset in with value labels (as_factor)
Apr17 <- read_sav("Apr17 public.sav", user_na = TRUE) %>% as_factor()
##create trump_approval variable and relevel it
Apr17 <- Apr17 %>%
mutate(trump_approval = case_when(
q1 == "Approve" & q1a == "Very strongly" ~ "Strongly approve",
q1 == "Approve" & q1a != "Very strongly" ~ "Not strongly approve",
q1 == "Disapprove" & q1a == "Very strongly" ~ "Strongly disapprove",
q1 == "Disapprove" & q1a != "Very strongly" ~ "Not strongly disapprove",
q1 == "Don't know/Refused (VOL.)" | q1a == "Don't know/Refused (VOL.)" ~ "Refused"
) #this parentheses closes our call to case_when
%>% #and then sends it to fct_relevel with %>%
fct_relevel("Strongly approve",
"Not strongly approve",
"Not strongly disapprove",
"Strongly disapprove",
"Refused"
) #this parentheses closes our call to fct_relevel
) #this parentheses closes our call to mutate
## collapse education variable into 3 categories
Apr17 <- Apr17 %>%
mutate(educ_cat = fct_collapse(educ2,
"High school grad or less" = c(
"Less than high school (Grades 1-8 or no formal schooling)",
"High school incomplete (Grades 9-11 or Grade 12 with NO diploma)",
"High school graduate (Grade 12 with diploma or GED certificate)"
),
"Some college" = c(
"Some college, no degree (includes some community college)",
"Two year associate degree from a college or university"
),
"College grad+" = c(
"Four year college or university degree/Bachelor's degree (e.g., BS, BA, AB)",
"Some postgraduate or professional schooling, no postgraduate degree",
"Postgraduate or professional degree, including master's, doctorate, medical or law degree"
)
) #this parentheses closes our call to fct_collapse
) #this parentheses closes our call to mutate
##get trump_approval weighted totals
trump_approval <- Apr17 %>%
group_by(trump_approval) %>%
summarise(weighted_n = sum(weight))
##get trump_approval weighted proportions
trump_approval <- Apr17 %>%
##group by trump_approval to calculated weighted totals by taking the sum of the weights
group_by(trump_approval) %>%
summarise(weighted_n = sum(weight)) %>%
##add the weighted_group_size to get the total weighted n and
##divide weighted_n by weighted_group_size to get the proportions
mutate(weighted_group_size = sum(weighted_n),
weighted_estimate = weighted_n / weighted_group_size
)
##get trump_approval by education
trump_estimates_educ <- Apr17 %>%
#group by educ and trump approval to get weighted n's per group
group_by(educ_cat, trump_approval) %>%
#calculate the total number of people in each answer and education category using survey weights (weight)
summarise(weighted_n = sum(weight)) %>%
#group by education to calculate education category size
group_by(educ_cat) %>%
#add columns for total group size and the proportion
mutate(weighted_group_size = sum(weighted_n),
weighted_estimate = weighted_n/weighted_group_size)
##select only columns interested in for this analysis
###rename psraid to resp_id
Apr17 <- Apr17 %>%
select(resp_id = psraid, weight, trump_approval, educ_cat, racethn, gen5)
##create Apr_17 long with gather
Apr17_long <- Apr17 %>%
#gather educ_cat, racethn, gen5 into two columns:
##a key called "subgroup variable" (educ_cat, racethn, gen5)
##and a value called "subgroup"
gather(key = subgroup_variable, value = subgroup, educ_cat, racethn, gen5)
##get weighted estimates for every subgroup
trump_estimates <- Apr17_long %>%
#group by subgroup_variable, subgroup, and trump approval to get weighted n of approval/disapproval for all subgroup cats
group_by(subgroup_variable, subgroup, trump_approval) %>%
#calculate the total number of people in each answer and education category using survey weights (weight)
summarise(weighted_n = sum(weight)) %>%
#group by subgroup only to calculate subgroup category size
group_by(subgroup) %>%
#add columns for total group size and the proportion
mutate(weighted_group_size = sum(weighted_n),
weighted_estimate = weighted_n/weighted_group_size)
#only want proportions so select out total categories
trump_estimates <- trump_estimates %>%
select(-weighted_n, -weighted_group_size)
##create plot
trump_estimates %>%
##remove "Refused" category for Trump Approval
filter(trump_approval != "Refused") %>%
##remove Refused categories in our subgroup values
filter(!(subgroup %in% c("Don't know/Refused (VOL.)", "DK/Ref"))) %>%
ggplot(
aes(
x = weighted_estimate,
y = subgroup
)
) +
geom_point() +
scale_x_continuous(limits = c(0, .8),
breaks = seq(0, .6, by = .2),
labels = scales::percent(seq(0, .6, by = .2), accuracy = 1)
) +
facet_grid(cols = vars(trump_approval),
rows = vars(subgroup_variable),
scales = "free_y",
space = "free"
) +
theme_bw() +
theme(axis.title.y = element_blank())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment