Created
May 1, 2022 11:39
-
-
Save Torvaney/68cc9f8228ea234f2b06b9c3703a7298 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(rvest) | |
box_office <- | |
read_html("https://www.boxofficemojo.com/title/tt0389790/?ref_=bo_se_r_1") %>% | |
html_nodes("h3 + table") %>% | |
# Select "Europe, Middle East, and Africa" table only | |
.[[2]] %>% | |
html_table() %>% | |
janitor::clean_names() %>% | |
mutate( | |
gross = parse_number(gross) | |
) | |
population <- | |
read_html("https://www.worldometers.info/world-population/population-by-country/") %>% | |
html_node("#example2") %>% | |
html_table() %>% | |
janitor::clean_names() %>% | |
mutate( | |
population_2020 = parse_number(population_2020) | |
) | |
box_office_per_capita <- | |
box_office %>% | |
# This join isn't perfect, but the countries we care about *are* matches, so meh | |
left_join(population, by = c("area" = "country_or_dependency")) %>% | |
mutate(gross_per_capita = gross / population_2020) | |
box_office_per_capita %>% | |
filter(area %in% c("Latvia", "Lithuania", "Estonia")) %>% | |
ggplot(aes(x = area, y = gross_per_capita)) + | |
geom_col() + | |
labs( | |
title = "Was Bee Movie more popular in Lithuania?", | |
subtitle = "Gross Box Office per capita ($)", | |
x = NULL, | |
y = NULL, | |
caption = str_c(c( | |
"Box office data: https://www.boxofficemojo.com", | |
"Population data: https://www.worldometers.info" | |
), collapse = "\n") | |
) + | |
theme_minimal() + | |
theme(plot.title = element_text(face = "bold")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment