graebnerc/#T12: Worked examples and solution to exercises

## #T12: Worked examples and solution to exercises
Includes code to create the data examples as well as exempla solutions to all exercises of the twelfth session.

## expl-1-data.R
# Creates expl-1.csv and ex1.csv
here::i_am("R/expl-1-data.R")
library(here)
library(data.table)
library(tidyr)
library(dplyr)
library(WDI)

world_bank_base <- WDI::WDI(
  country = c("DE", "ES", "IT", "NL"),
  start = 2008, end = 2012,
  indicator = c("Growth"="NY.GDP.MKTP.KD.ZG",
                "EducationSpending"="SE.XPD.TOTL.GD.ZS",
                "HealthSpending"="SH.XPD.CHEX.GD.ZS"))

# Worked example:
expl1_data <- world_bank_base %>%
  select(c("country", "year", "HealthSpending", "EducationSpending")) %>%
  pivot_longer(
    cols = c("HealthSpending", "EducationSpending"),
    names_to = "Variable", values_to = "Value") %>%
  pivot_wider(
    names_from = "year", values_from = "Value")

fwrite(expl1_data, file = here("data/expl-1.csv"))

# Exercise 1:
ex1_data <- vantage_point %>%
  pivot_longer(
    cols = -c("country", "Variable"),
    names_to = "year",
    values_to = "Value") %>%
  pivot_wider(
    names_from = "country",
    values_from = "Value")
fwrite(ex1_data, here("data/ex1.csv"))

## expl-1.R
# Worked example 1 and exercise 1
here::i_am("R/expl-1.R")
library(here)
library(data.table)
library(tidyr)
library(dplyr)

# Worked example:
vantage_point <- fread(here("data/expl-1.csv"), header = TRUE)

intermediate_step <- vantage_point %>%
  pivot_longer(
    cols = -c("country", "Variable"),
    names_to = "year",
    values_to = "Value")

final_result <- intermediate_step %>%
  pivot_wider(
    names_from = "Variable",
    values_from = "Value")

# Everything in one step using pipes:
final_result <- vantage_point %>%
  pivot_longer(
    cols = -c("country", "Variable"),
    names_to = "year",
    values_to = "Value") %>%
  pivot_wider(
    names_from = "Variable",
    values_from = "Value")

# Exercise 1:
ex1_data <- fread(here("data/ex1.csv"))

ex1_solution <- ex1_data %>%
  pivot_longer(
    cols = -c("Variable", "year"),
    names_to = "country",
    values_to = "value") %>%
  pivot_wider(
    names_from = "Variable",
    values_from = "value")
ex1_solution

## expl-2-data.R
# Creates ex2.csv
here::i_am("R/expl-2-data.R")
library(here)
library(data.table)
library(tidyr)
library(dplyr)
library(WDI)

world_bank_base <- WDI::WDI(
  country = c("DE", "ES", "IT", "NL"),
  start = 2008, end = 2012,
  indicator = c("GDP_total"="NY.GDP.MKTP.KD",
                "CO2_pc"="EN.ATM.CO2E.PC",
                "Population"="SP.POP.TOTL"))

ex2_data <- world_bank_base %>%
  select(c("country", "year", "GDP_total", "Population")) %>%
  pivot_longer(
    cols = c("GDP_total", "Population"),
    names_to = "Variable",
    values_to = "Value") %>%
  pivot_wider(
    names_from = "year", values_from = "Value")

fwrite(ex2_data, file = here("data/ex2.csv"))

## expl-2.R
# Exercise 2
here::i_am("R/expl-2.R")
library(here)
library(data.table)
library(tidyr)
library(dplyr)

vantage_point <- fread(here("data/ex2.csv"), header = TRUE)

intermediate_step_1 <- vantage_point %>%
  pivot_longer(
    cols = -c("country", "Variable"),
    names_to = "year",
    values_to = "value")

intermediate_step_2 <- intermediate_step_1 %>%
  pivot_wider(
    names_from = "Variable",
    values_from = "value")

intermediate_step_3 <- intermediate_step_2  %>%
  mutate(
    GDP_pc = GDP_total / Population
  )

final_result <- intermediate_step_3 %>%
  group_by(country) %>%
  summarise(
    GDP_pc_avg = mean(GDP_pc),
    .groups = "drop")

# Using pipes:
final_result <- vantage_point %>%
  pivot_longer(
    cols = -c("country", "Variable"),
    names_to = "year",
    values_to = "value") %>%
  pivot_wider(
    names_from = "Variable",
    values_from = "value") %>%
  mutate(
    GDP_pc = GDP_total / Population
  ) %>%
  group_by(country) %>%
  summarise(
    GDP_pc_avg = mean(GDP_pc),
    .groups = "drop")

## expl-3-data.R
# Creates ex3.csv
here::i_am("R/expl-3-data.R")
library(here)
library(data.table)
library(tidyr)
library(dplyr)
library(WDI)

ex3_data <- WDI::WDI(
  country = c(
    "DE", "ES", "IT", "NL", "CN", "US", "IN",
    "SA", "ZA", "NA", "CL", "PE", "NI"),
  start = 2008, end = 2012,
  indicator = c("CO2_pc"="EN.ATM.CO2E.PC"), extra = TRUE) %>%
  select(c("country", "year", "CO2_pc", "income")) %>%
  pivot_wider(names_from = "year", values_from = "CO2_pc")

fwrite(ex3_data, file = here("data/ex3.csv"))

## expl-3.R
# Final task
here::i_am("R/expl-3.R")
library(here)
library(data.table)
library(tidyr)
library(dplyr)

vantage_point <- fread(here("data/ex3.csv"), header = TRUE)

# Compute the deviation from the mean CO2 emissions for each
#  country in each year.
base_data <- vantage_point %>%
  pivot_longer(
    cols = -c("country", "income"),
    names_to = "year",
    values_to = "CO2_pc") %>%
  group_by(year) %>%
  mutate(mean_co2 = mean(CO2_pc)) %>%
  ungroup()

country_deviation <- base_data %>%
  mutate(
    dev_mean_co2 = CO2_pc - mean_co2)

# Then compute the average deviation per income group!
income_group_deviation <- country_deviation %>%
  group_by(income, year) %>%
  summarise(mean_deviation = mean(dev_mean_co2), .groups = "drop")

# Finally, take this result and average the deviations
#  over group over time!

mean_inc_deviation <- income_group_deviation %>%
  group_by(income) %>%
  summarise(mean_dev = mean(mean_deviation))
mean_inc_deviation
	# Creates expl-1.csv and ex1.csv
	here::i_am("R/expl-1-data.R")
	library(here)
	library(data.table)
	library(tidyr)
	library(dplyr)
	library(WDI)

	world_bank_base <- WDI::WDI(
	country = c("DE", "ES", "IT", "NL"),
	start = 2008, end = 2012,
	indicator = c("Growth"="NY.GDP.MKTP.KD.ZG",
	"EducationSpending"="SE.XPD.TOTL.GD.ZS",
	"HealthSpending"="SH.XPD.CHEX.GD.ZS"))

	# Worked example:
	expl1_data <- world_bank_base %>%
	select(c("country", "year", "HealthSpending", "EducationSpending")) %>%
	pivot_longer(
	cols = c("HealthSpending", "EducationSpending"),
	names_to = "Variable", values_to = "Value") %>%
	pivot_wider(
	names_from = "year", values_from = "Value")

	fwrite(expl1_data, file = here("data/expl-1.csv"))

	# Exercise 1:
	ex1_data <- vantage_point %>%
	pivot_longer(
	cols = -c("country", "Variable"),
	names_to = "year",
	values_to = "Value") %>%
	pivot_wider(
	names_from = "country",
	values_from = "Value")
	fwrite(ex1_data, here("data/ex1.csv"))
	# Worked example 1 and exercise 1
	here::i_am("R/expl-1.R")
	library(here)
	library(data.table)
	library(tidyr)
	library(dplyr)

	# Worked example:
	vantage_point <- fread(here("data/expl-1.csv"), header = TRUE)

	intermediate_step <- vantage_point %>%
	pivot_longer(
	cols = -c("country", "Variable"),
	names_to = "year",
	values_to = "Value")

	final_result <- intermediate_step %>%
	pivot_wider(
	names_from = "Variable",
	values_from = "Value")

	# Everything in one step using pipes:
	final_result <- vantage_point %>%
	pivot_longer(
	cols = -c("country", "Variable"),
	names_to = "year",
	values_to = "Value") %>%
	pivot_wider(
	names_from = "Variable",
	values_from = "Value")

	# Exercise 1:
	ex1_data <- fread(here("data/ex1.csv"))

	ex1_solution <- ex1_data %>%
	pivot_longer(
	cols = -c("Variable", "year"),
	names_to = "country",
	values_to = "value") %>%
	pivot_wider(
	names_from = "Variable",
	values_from = "value")
	ex1_solution
	# Creates ex2.csv
	here::i_am("R/expl-2-data.R")
	library(here)
	library(data.table)
	library(tidyr)
	library(dplyr)
	library(WDI)

	world_bank_base <- WDI::WDI(
	country = c("DE", "ES", "IT", "NL"),
	start = 2008, end = 2012,
	indicator = c("GDP_total"="NY.GDP.MKTP.KD",
	"CO2_pc"="EN.ATM.CO2E.PC",
	"Population"="SP.POP.TOTL"))

	ex2_data <- world_bank_base %>%
	select(c("country", "year", "GDP_total", "Population")) %>%
	pivot_longer(
	cols = c("GDP_total", "Population"),
	names_to = "Variable",
	values_to = "Value") %>%
	pivot_wider(
	names_from = "year", values_from = "Value")

	fwrite(ex2_data, file = here("data/ex2.csv"))
	# Exercise 2
	here::i_am("R/expl-2.R")
	library(here)
	library(data.table)
	library(tidyr)
	library(dplyr)

	vantage_point <- fread(here("data/ex2.csv"), header = TRUE)

	intermediate_step_1 <- vantage_point %>%
	pivot_longer(
	cols = -c("country", "Variable"),
	names_to = "year",
	values_to = "value")

	intermediate_step_2 <- intermediate_step_1 %>%
	pivot_wider(
	names_from = "Variable",
	values_from = "value")

	intermediate_step_3 <- intermediate_step_2 %>%
	mutate(
	GDP_pc = GDP_total / Population
	)

	final_result <- intermediate_step_3 %>%
	group_by(country) %>%
	summarise(
	GDP_pc_avg = mean(GDP_pc),
	.groups = "drop")

	# Using pipes:
	final_result <- vantage_point %>%
	pivot_longer(
	cols = -c("country", "Variable"),
	names_to = "year",
	values_to = "value") %>%
	pivot_wider(
	names_from = "Variable",
	values_from = "value") %>%
	mutate(
	GDP_pc = GDP_total / Population
	) %>%
	group_by(country) %>%
	summarise(
	GDP_pc_avg = mean(GDP_pc),
	.groups = "drop")
	# Creates ex3.csv
	here::i_am("R/expl-3-data.R")
	library(here)
	library(data.table)
	library(tidyr)
	library(dplyr)
	library(WDI)

	ex3_data <- WDI::WDI(
	country = c(
	"DE", "ES", "IT", "NL", "CN", "US", "IN",
	"SA", "ZA", "NA", "CL", "PE", "NI"),
	start = 2008, end = 2012,
	indicator = c("CO2_pc"="EN.ATM.CO2E.PC"), extra = TRUE) %>%
	select(c("country", "year", "CO2_pc", "income")) %>%
	pivot_wider(names_from = "year", values_from = "CO2_pc")

	fwrite(ex3_data, file = here("data/ex3.csv"))
	# Final task
	here::i_am("R/expl-3.R")
	library(here)
	library(data.table)
	library(tidyr)
	library(dplyr)

	vantage_point <- fread(here("data/ex3.csv"), header = TRUE)

	# Compute the deviation from the mean CO2 emissions for each
	# country in each year.
	base_data <- vantage_point %>%
	pivot_longer(
	cols = -c("country", "income"),
	names_to = "year",
	values_to = "CO2_pc") %>%
	group_by(year) %>%
	mutate(mean_co2 = mean(CO2_pc)) %>%
	ungroup()

	country_deviation <- base_data %>%
	mutate(
	dev_mean_co2 = CO2_pc - mean_co2)

	# Then compute the average deviation per income group!
	income_group_deviation <- country_deviation %>%
	group_by(income, year) %>%
	summarise(mean_deviation = mean(dev_mean_co2), .groups = "drop")

	# Finally, take this result and average the deviations
	# over group over time!

	mean_inc_deviation <- income_group_deviation %>%
	group_by(income) %>%
	summarise(mean_dev = mean(mean_deviation))
	mean_inc_deviation