Skip to content

Instantly share code, notes, and snippets.

@swuyts
Last active December 14, 2017 16:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save swuyts/3c2a4e034bf64397c49facc92e9ff936 to your computer and use it in GitHub Desktop.
Save swuyts/3c2a4e034bf64397c49facc92e9ff936 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(pdftools)
df <- pdf_text("SB-Beurs-2017.pdf") %>%
str_split(pattern = "\n") %>%
unlist() %>%
as_tibble() %>%
filter(str_detect(value, "18N")) %>% # select lines with project number in them
mutate(value = str_trim(value), # trim trailing spaces
value = if_else(str_sub(value, 1, 2) %in% str_c(LETTERS, " "), # Remove letters
str_sub(value, 3, -1),
value)) %>%
separate(value, into = c("Mandaathouder", "Universiteit", "Aanvraagnummer"), fill = "right", sep = "\\s{2,}") %>%
mutate(Aanvraagnummer = if_else(is.na(Aanvraagnummer), # Not everything is parsed well so neet some tweaking
str_sub(Universiteit, -8,-1),
Aanvraagnummer),
Universiteit = if_else(str_detect(Universiteit, "18N"), # Split strings with project numbers in their University column
str_sub(Universiteit, 1, -9),
Universiteit),
Universiteit = if_else(Universiteit == "", # Split strings with University name in Mandaathouder column
str_extract(Mandaathouder, "[A-z]+$"),
Universiteit),
Universiteit = if_else(Universiteit == "Leuven", # Fix if only Leuven was put in the University column
"KU Leuven",
str_trim(Universiteit)))
df %>% group_by(Universiteit) %>%
summarise(Total_projects = n()) %>%
ggplot(aes(x = reorder(Universiteit, Total_projects), y = Total_projects)) +
geom_col() +
xlab("") +
ylab("Amount of FWO-SB projects") +
coord_flip() +
theme_minimal() +
theme(axis.text.y = element_text(size = 16, face = "bold"))
ggsave("FWO_SB_2017.png")
# Update
professors <- tibble(Universiteit = c("UAntwerpen", "KU Leuven", "UGent", "VUB", "UHasselt"),
NumberZAP = c(624, 1561, 1502, 402, 392))
df %>% group_by(Universiteit) %>%
summarise(Total_projects = n()) %>%
ungroup() %>%
left_join(professors) %>%
mutate(normalized_count = Total_projects/(NumberZAP/100)) %>%
ggplot(aes(x = reorder(Universiteit, normalized_count), y = normalized_count)) +
geom_col() +
xlab("") +
ylab("Amount of FWO-SB projects per 100 professors") +
coord_flip() +
theme_minimal() +
theme(axis.text.y = element_text(size = 16, face = "bold"))
ggsave("FWO_SB_2017_per100prof.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment