Skip to content

Instantly share code, notes, and snippets.

@Torvaney
Created May 1, 2022 11:39
Show Gist options
  • Save Torvaney/68cc9f8228ea234f2b06b9c3703a7298 to your computer and use it in GitHub Desktop.
Save Torvaney/68cc9f8228ea234f2b06b9c3703a7298 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(rvest)
box_office <-
read_html("https://www.boxofficemojo.com/title/tt0389790/?ref_=bo_se_r_1") %>%
html_nodes("h3 + table") %>%
# Select "Europe, Middle East, and Africa" table only
.[[2]] %>%
html_table() %>%
janitor::clean_names() %>%
mutate(
gross = parse_number(gross)
)
population <-
read_html("https://www.worldometers.info/world-population/population-by-country/") %>%
html_node("#example2") %>%
html_table() %>%
janitor::clean_names() %>%
mutate(
population_2020 = parse_number(population_2020)
)
box_office_per_capita <-
box_office %>%
# This join isn't perfect, but the countries we care about *are* matches, so meh
left_join(population, by = c("area" = "country_or_dependency")) %>%
mutate(gross_per_capita = gross / population_2020)
box_office_per_capita %>%
filter(area %in% c("Latvia", "Lithuania", "Estonia")) %>%
ggplot(aes(x = area, y = gross_per_capita)) +
geom_col() +
labs(
title = "Was Bee Movie more popular in Lithuania?",
subtitle = "Gross Box Office per capita ($)",
x = NULL,
y = NULL,
caption = str_c(c(
"Box office data: https://www.boxofficemojo.com",
"Population data: https://www.worldometers.info"
), collapse = "\n")
) +
theme_minimal() +
theme(plot.title = element_text(face = "bold"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment