Skip to content

Instantly share code, notes, and snippets.

@jkaupp
Last active April 27, 2018 17:53
Show Gist options
  • Save jkaupp/1456f79362b5f05270868416d083a9e3 to your computer and use it in GitHub Desktop.
Save jkaupp/1456f79362b5f05270868416d083a9e3 to your computer and use it in GitHub Desktop.
STEM Funding Plot
library(tidyverse)
library(qualtRics)
library(rlang)
library(viridis)
library(hrbrthemes)
registerOptions(api_token="TOTALLY_NOT_MY_TOKEN", root_url="https://queensu.qualtrics.com")
surveys <- getSurveys()
srvy_id <- surveys %>%
filter(grepl("STEM", name)) %>%
pull(id)
nserc_funding_data <- getSurvey(surveyID = srvy_id, force_request = TRUE)
funding_levels <- c("$0-$2,000", "$2,001-$5,000", "$5,001-$10,000", "$10,001-$20,000", "$20,001-$50,000", "$50,001-$100,000", "$100,001-$200,000", "$200,001-$500,000", "$500,001-$1,000,000", "$over 1 million")
labels <- nserc_funding_data %>%
select(matches("Q[2-5]")) %>%
map_chr(function(x) attributes(x)$label) %>%
tibble(item = names(.),
label = .)
resp <- filter(labels, grepl("Q5", item)) %>%
pull(label)
tidy_survey <- nserc_funding_data %>%
select(matches("Q[2-5]")) %>%
gather(item, value, -Q2, -Q3, -Q2_TEXT, -Q3_TEXT,na.rm = TRUE) %>%
mutate(question = str_extract(item, "Q\\d")) %>%
rename(primary_appointment = Q2,
funding_in = Q3) %>%
mutate_at(c("primary_appointment", "funding_in"), function(x) replace(x, grepl("other", x,ignore.case = TRUE), NA)) %>%
mutate_all(as.character) %>%
mutate(primary_appointment = coalesce(primary_appointment, Q2_TEXT),
funding_in = coalesce(funding_in, Q3_TEXT)) %>%
select(question, item, value, primary_appointment, funding_in) %>%
left_join(labels)
# Faceted plot by Primary Appointment -----
discipline_n <- nserc_funding_data %>%
rename(primary_appointment = Q2,
funding_in = Q3) %>%
mutate_at(c("primary_appointment", "funding_in"), function(x) replace(x, grepl("other", x,ignore.case = TRUE), NA)) %>%
mutate_all(as.character) %>%
mutate(primary_appointment = coalesce(primary_appointment, Q2_TEXT),
funding_in = coalesce(funding_in, Q3_TEXT)) %>%
count(primary_appointment) %>%
filter(primary_appointment %in% c("Education","Engineering", "Science"))
plot_data <- tidy_survey %>%
filter(question == "Q5", primary_appointment != 'Technology') %>%
filter(primary_appointment %in% c("Education","Engineering","Science")) %>%
mutate(value = stringi::stri_replace_last_regex(value, "\\$", "")) %>%
mutate(value = factor(value, stringi::stri_replace_last_regex(funding_levels, "\\$", ""))) %>%
filter(value != "$0-2,000") %>%
mutate(value = fct_drop(value)) %>%
select(-funding_in) %>%
count(label, value, primary_appointment) %>%
ungroup() %>%
complete(label, value, nesting(primary_appointment), fill = list(n = 0)) %>%
left_join(discipline_n, by = "primary_appointment") %>%
mutate(label = factor(label, rev(resp))) %>%
mutate(percent = n.x/n.y) %>%
mutate(percent = replace(percent, is.nan(percent), 0)) %>%
ungroup() %>%
mutate(primary_appointment = glue::glue("{primary_appointment}: {n.y} responses")) %>%
mutate(value = fct_recode(value, "> 1-million" = "over 1 million"))
plot <- ggplot(plot_data, aes(x = value, y = label, fill = percent)) +
geom_tile(color = "white") +
scale_fill_viridis("Response Frequency", option = "cividis", labels = scales::percent) +
scale_x_discrete(expand=c(0,0)) +
scale_y_discrete(expand=c(0,0)) +
labs(title = "Sources of STEM Education Research Funding in Canada",
subtitle = "Results from a survey of funding sources of Canadian STEM education researchers",
caption = "Survey conducted by a Special Interest Group within the Canadian Engineering Education Association",
x = NULL, y = NULL) +
facet_wrap(~primary_appointment, nrow = 1 ) +
coord_equal() +
theme_ipsum_rc(base_size = 16, grid = FALSE, plot_title_size = 20, subtitle_size = 18) +
theme(legend.position = "bottom",
legend.key.width = unit(1, "cm"),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
ggsave("STEM ED Funding by Primary Appointment.png", plot, width = 15, height = 10)
# Combined plot ----
total_n <- nserc_funding_data %>%
filter(!is.na(Q2)) %>%
nrow()
combined_plot_data <- tidy_survey %>%
filter(question == "Q5", primary_appointment != 'Technology') %>%
filter(primary_appointment %in% c("Education","Engineering","Science")) %>%
mutate(value = stringi::stri_replace_last_regex(value, "\\$", "")) %>%
mutate(value = factor(value, stringi::stri_replace_last_regex(funding_levels, "\\$", ""))) %>%
filter(value != "$0-2,000") %>%
mutate(value = fct_drop(value)) %>%
select(-funding_in) %>%
count(label, value) %>%
ungroup() %>%
complete(label, value, fill = list(n = 0)) %>%
mutate(label = factor(label, rev(resp))) %>%
mutate(percent = n/total_n) %>%
mutate(percent = replace(percent, is.nan(percent), 0)) %>%
ungroup() %>%
mutate(value = fct_recode(value, "> 1-million" = "over 1 million"))
combined_plot <- ggplot(combined_plot_data, aes(x = value, y = label, fill = percent)) +
geom_tile(color = "white") +
scale_fill_viridis("Response Frequency", option = "cividis", labels = scales::percent) +
scale_x_discrete(expand=c(0,0)) +
scale_y_discrete(expand=c(0,0)) +
labs(title = "Sources of STEM Education Research Funding in Canada",
subtitle = "Results from a survey of funding sources of Canadian STEM education researchers",
caption = "Survey conducted by a Special Interest Group within the Canadian Engineering Education Association",
x = NULL, y = NULL) +
coord_equal() +
theme_ipsum_rc(base_size = 16, grid = FALSE, plot_title_size = 20, subtitle_size = 18) +
theme(legend.position = "bottom",
legend.key.width = unit(1, "cm"),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
ggsave("Overall STEM ED Funding.png", combined_plot, width = 15, height = 10)
recoded_eng_data <- tidy_survey %>%
filter(question == "Q5", primary_appointment != 'Technology') %>%
filter(primary_appointment == "Engineering") %>%
mutate(value = stringi::stri_replace_last_regex(value, "\\$", "")) %>%
mutate(value = factor(value, stringi::stri_replace_last_regex(funding_levels, "\\$", ""))) %>%
filter(value != "$0-2,000") %>%
mutate(value = fct_drop(value)) %>%
select(-funding_in) %>%
count(label, value) %>%
ungroup() %>%
complete(label, value, fill = list(n = 0)) %>%
mutate(label = factor(label, rev(resp))) %>%
mutate(percent = n/eng_total) %>%
mutate(percent = replace(percent, is.nan(percent), 0)) %>%
ungroup() %>%
mutate(value = fct_recode(value, "> 1-million" = "over 1 million")) %>%
mutate(label = trimws(label)) %>%
mutate(
label = fct_collapse(
label,
institutional = c(
"Funding from your Department",
"Funding from your Faculty/Dean's office",
"Funding from your University"
),
provincial = c(
"HEQCO",
"Provincial ministry"
),
federal = c("Federal ministry"),
ext_council = c(
"Non-Canadian research granting council (e.g. NSF)",
"Mitacs",
"Provincial council on articulation and transfer (e.g. BCCAT, ONCAT, etc.)",
"Non-profit organization"
),
private = c("Private industry", "Benefactor/private donor"),
other = "Other",
tri_council = c(
"CIHR",
"SSHRC graduate student award",
"SSHRC grant",
#"NSERC Chair in Design Engineering",
"NSERC graduate student award",
"NSERC grant"
)
)
)
summarized_data <- recoded_eng_data %>%
filter(label != "other") %>%
group_by(label) %>%
summarize(n = sum(n)) %>%
mutate(percent = n/eng_total)
axis_labels <- c("institutional" = "Institutional",
"tri_council" = "Tri-Council",
"provincial" = "Provincial Agencies & Ministry",
"ext_council" = "Non-Profit & US Councils",
"private" = "Private Benefactors & Funding",
"federal" = "Federal Agencies & Ministry")
source_plot <- ggplot(summarized_data, aes(x = reorder(label, percent), y =percent)) +
geom_col(fill = viridis(1, option = "cividis")) +
geom_text(aes(label = scales::percent(percent)), color = "white", family = "Roboto Condensed", hjust = 1, nudge_y = -0.01, data = filter(summarized_data, label != "federal")) +
geom_text(aes(label = scales::percent(percent)), color = "black", nudge_y = 0.05, family = "Roboto Condensed", data = filter(summarized_data, label == "federal")) +
coord_flip() +
scale_x_discrete(labels = function(x) axis_labels[x]) +
scale_y_continuous(labels = scales::percent, limits = c(0,1)) +
labs(title = "Primary Sources of Engineering Education Research Funding in Canada",
subtitle = str_wrap(glue::glue("Frequency of Engineering education funding by source, of {eng_total} responses. Results from a survey of funding sources of Canadian STEM education researchers."), 80),
caption = "Survey conducted by a Special Interest Group within the Canadian Engineering Education Association",
x = NULL, y = NULL) +
theme_ipsum_rc(base_size = 16, grid = "X", plot_title_size = 20, subtitle_size = 18)
ggsave("Frequency of Overall ENG ED Funding by source.png", source_plot, width = 16, height = 10)
summarized_funding <- recoded_eng_data %>%
filter(label != "other", label != "NSERC Chair in Design Engineering") %>%
mutate(value = gsub("\\$", "", value),
value = gsub("\\,", "", value)) %>%
mutate(value = ifelse(value == "> 1-million", "1000001-1000000", value)) %>%
separate(value, c("lower","upper"), sep = "-") %>%
mutate(lower = as.numeric(lower) -1) %>%
mutate(average = as.numeric(lower) + (as.numeric(upper) - as.numeric(lower))/2) %>%
mutate(funds = average * n) %>%
group_by(label) %>%
summarize(funds = sum(funds))
funding_plot <- ggplot(summarized_funding, aes(x = reorder(label, funds), y = funds)) +
geom_col(fill = viridis(1, option = "cividis")) +
geom_text(aes(label = scales::dollar(funds)), color = "white", nudge_y = -25000, family = "Roboto Condensed", hjust = 1) +
#geom_text(aes(label = scales::dollar(funds)), color = "black", nudge_y = 150000, family = "Roboto Condensed", data = filter(summarized_funding, label == "federal")) +
coord_flip() +
scale_x_discrete(labels = function(x) axis_labels[x]) +
scale_y_continuous(labels = scales::dollar) +
labs(title = "Engineering Education Research Funding in Canada",
subtitle = str_wrap(glue::glue("Average Amount of Engineering Education funding by source, from {eng_total} responses. Average funding calculated as the mean value of a funding range reported (e.g. $2,000 - $5,000 = $3,500)."), 80),
caption = "\nData: Survey of funding sources of Canadian STEM education researchers. | Survey conducted by a Special Interest Group within the Canadian Engineering Education Association | Graphic: @jakekaupp",
x = NULL, y = NULL) +
theme_ipsum_rc(base_size = 16, grid = "X", plot_title_size = 20, subtitle_size = 18)
ggsave("Overall ENG ED Funding Amounts by source.png", funding_plot, width = 16, height = 10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment