Skip to content

Instantly share code, notes, and snippets.

@jrosell
Created March 22, 2024 15:26
Show Gist options
  • Save jrosell/88a7756815a9bc4f9da7db9a51045e5d to your computer and use it in GitHub Desktop.
Save jrosell/88a7756815a9bc4f9da7db9a51045e5d to your computer and use it in GitHub Desktop.
# Dades per CCAA ===============================================================
library(tidyverse)
library(rvest)
theme_set(theme_minimal())
older_year <- 2002
region <- tibble(
slug = c("andalucia", "aragon", "asturias", "canarias", "cantabria", "castilla-leon",
"castilla-la-mancha", "cataluna", "ceuta", "melilla", "madrid", "valencia",
"extremadura", "galicia", "islas-baleares", "la-rioja", "navarra", "murcia",
"pais-vasco"),
es = c("Andalucía", "Aragón", "Asturias", "Canarias", "Cantabria", "Castilla y León",
"Castilla-La Mancha", "Cataluña", "Ceuta", "Melilla", "Madrid", "Comunidad Valenciana",
"Extremadura", "Galicia", "Islas Baleares", "La Rioja", "Navarra", "Región de Murcia",
"País Vasco"),
name = c("Andalusia", "Aragó", "Astúries", "Canàries", "Cantàbria", "Castella i Lleó",
"Castella-la Manxa", "Catalunya", "Ceuta", "Melilla", "Madrid", "Comunitat Valenciana",
"Extremadura", "Galícia", "Illes Balears", "La Rioja", "Navarra", "Regió de Múrcia",
"País Basc")
)
safe_html <- safely(read_html, otherwise = NULL)
if(!exists("html_results")) {
html_results <-
region$slug %>%
map(\(x) {
tibble(
slug = x,
tables = safe_html(glue::glue("https://datosmacro.expansion.com/pib/espana-comunidades-autonomas/{x}")) %>%
pluck("result") %>%
html_table()
)
})
}
tables <-
html_results %>%
list_rbind() %>%
mutate(tables = map(tables, \(x)janitor::clean_names(x))) %>%
mutate(row = paste0("table", 1 + (1 +row_number()) %% 2))
money_to_dobule <- \(x){
x %>%
str_replace("M", "000000") %>%
str_replace_all("(\\s| |\\.|€)", "") %>%
parse_double()
}
## pib_by_ccaa =================================================================
pib_by_ccaa <-
tables %>%
filter(str_detect(row, 'table1')) %>%
pivot_wider(id_cols = slug, names_from = row, values_from = tables) %>%
unnest(table1) %>%
transmute(
slug,
year_integer = fecha,
year_date = make_date(year = fecha),
pib_anual = money_to_dobule(pib_anual),
) %>%
filter(year_integer >= older_year) %>%
left_join(region, by = join_by(slug))
pib_by_ccaa_labels <- filter(pib_by_ccaa, near(year_integer, 2022), pib_anual > 50000000000)
pib_by_ccaa %>%
ggplot(aes(year_date, pib_anual, colour = name)) +
geom_line() +
ggrepel::geom_text_repel(aes(label = name), data = pib_by_ccaa_labels) +
scale_x_date(date_breaks = "year", date_labels ="%Y") +
scale_y_continuous(
labels = scales::unit_format(unit = "M€", scale = 1e-6)
) +
labs(
title = "Evolució anual del PIB anual per CCAA",
x = "", y = "", colour = "CCAA",
caption = "Autor @jrosell | Font https://datosmacro.expansion.com"
) +
theme(legend.position = "none")
## pib_per_capita_by_ccaa ======================================================
pib_per_capita_by_ccaa <-
tables %>%
filter(str_detect(row, 'table2')) %>%
pivot_wider(id_cols = slug, names_from = row, values_from = tables) %>%
unnest(table2) %>%
transmute(
slug,
year_integer = fecha,
year_date = make_date(year = fecha),
pib_per_capita = money_to_dobule(pib_per_capita),
) %>%
filter(year_integer >= older_year) %>%
left_join(region, by = join_by(slug)) %>%
select(-slug, -es)
pib_per_capita_by_ccaa %>% head()
pib_per_capita_by_ccaa_labels_first <-
pib_per_capita_by_ccaa %>%
filter(near(year_integer, 2002) & pib_per_capita > 19000)
pib_per_capita_by_ccaa_labels_last <-
pib_per_capita_by_ccaa %>%
filter(near(year_integer, 2022) & pib_per_capita > 28000)
pib_per_capita_by_ccaa %>%
ggplot(aes(year_date, pib_per_capita, colour = name)) +
geom_line() +
ggrepel::geom_text_repel(aes(label = name), data = pib_per_capita_by_ccaa_labels_first, nudge_x = -400) +
ggrepel::geom_text_repel(aes(label = name), data = pib_per_capita_by_ccaa_labels_last, nudge_x = 400) +
scale_x_date(
breaks = seq(as.Date("2002-01-01"), as.Date("2022-12-31"), by = "years"),
date_labels ="%Y"
) +
scale_y_continuous(
limits = c(10000, 40000),
breaks = seq(10000, 40000, by = 5000),
labels = scales::label_number(big.mark = ".", decimal.mark = ",", suffix = "€")
) +
labs(
title = "Evolució anual del PIB per càpita per CCAA",
x = "", y = "", colour = "CCAA",
caption = "Autor @jrosell | Font https://datosmacro.expansion.com"
) +
theme(
legend.position = "none",
panel.grid.major.y = element_line(color = "gainsboro"),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_line(color = "gainsboro"),
panel.grid.minor.x = element_blank(),
)
@jrosell
Copy link
Author

jrosell commented Mar 22, 2024

evolucio_anaul_pib_per_capitap_per_ccaa

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment