Skip to content

Instantly share code, notes, and snippets.

@graebnerc
Created June 22, 2023 15:13
Show Gist options
  • Save graebnerc/1ed64ed3e30a0a80d41e6381aabc2e32 to your computer and use it in GitHub Desktop.
Save graebnerc/1ed64ed3e30a0a80d41e6381aabc2e32 to your computer and use it in GitHub Desktop.
In-class solutions for the recap session.
here::i_am("inclass-regression-solution.R")
library(here)
library(dplyr)
library(tidyr)
library(data.table)
library(ggplot2)
# Step 1: import data----------
reg_data <- data.table::fread(here("data/reg_data_1.csv")) %>%
as_tibble()
head(reg_data)
# Step 2: decide on specification----------
# Check out correlations and decide whether variables should be
# included and whether they need to be transformed
ggplot(data = reg_data, aes(x=x1, y=y)) +
geom_point() + theme_bw()
# There is a relationship
# The relationship is not linear, but quadratic -> include x1 and x1^2
ggplot(data = reg_data, aes(x=log(x2), y=y)) +
geom_point() + theme_bw()
# There is a relationship
# The relationship is only linear if x2 gets log-transformed
ggplot(data = reg_data, aes(x=x3, y=y)) +
geom_point() + theme_bw()
# There is no relationship, the variable should not be included
# Step 3: Fit the model----------
# Two equivalent ways, either do transformation in the formula:
lin_model_quad <- lm(y ~x1 + I(x1^2) + log(x2), data = reg_data)
# Or add the transformed variables to the data:
reg_data <- reg_data %>%
mutate(# Alternative way to use transformations in the model
x1_squared = x1**2, x2_log = log(x2))
lin_model_quad <- lm(y ~x1 + x1_squared + x2_log, data = reg_data)
summary(lin_model_quad)
# Step 4 (if needed): assess the model----------
tibble(
"Fitted"=lin_model_quad$fitted.values,
"Residuals"=lin_model_quad$residuals) %>%
ggplot(aes(x=Fitted, y=Residuals)) +
geom_hline(yintercept = 0) +
geom_point() + theme_bw()
# If you omitted the quadratic term:
lin_model_lin <- lm(y ~x1 + log(x2), data = reg_data)
tibble(
"Fitted"=lin_model_lin$fitted.values,
"Residuals"=lin_model_lin$residuals) %>%
ggplot(aes(x=Fitted, y=Residuals)) +
geom_hline(yintercept = 0) +
geom_point() + theme_bw()
# Hint: Good to have those prepared;)
ggplot(data = , aes(x=, y=)) +
geom_point() + theme_bw()
tibble("Fitted"=, "Residuals"=) %>%
ggplot(aes(x=Fitted, y=Residuals)) +
geom_hline(yintercept = 0) +
geom_point() + theme_bw()
here::i_am("inclass-wrangling-solutions.R")
library(here)
library(dplyr)
library(tidyr)
library(data.table)
# In class solution for data wrangling----------------
raw_data <- data.table::fread("data/wrangel_1.csv", header = TRUE) %>%
as_tibble()
head(raw_data)
tidy_data <- raw_data %>%
tidyr::pivot_longer(
cols = -all_of(c("country", "name")),
# cols = as.character(seq(2005, 2020)),
names_to = "year",
values_to = "observation") %>%
tidyr::pivot_wider(
names_from = "name",
values_from = "observation")
head(tidy_data)
mean_data <- tidy_data %>%
group_by(country) %>%
summarise(
GrowthMean = mean(Growth, na.rm = TRUE),
EducSpendingMean = mean(EducationSpending, na.rm = TRUE),
HealthSpendingMean = mean(HealthSpending, na.rm = TRUE)
)
mean_data
# Alternative:
mean_data_alt <- tidy_data %>%
pivot_longer(cols = -c("country", "year")) %>%
# pivot_longer(cols = c("Growth", "EducationSpending", "HealthSpending")) %>%
summarise(MeanVal = mean(value, na.rm = TRUE), .by = c("country", "name")) %>%
pivot_wider(names_from = "country", values_from = "MeanVal")
mean_data_alt
# Alternative:
mean_data_alt2 <- tidy_data %>%
summarise(
across(
all_of(c("Growth", "EducationSpending", "HealthSpending")),
.fns = ~ mean(.x, na.rm = TRUE)),
.by = "country")
mean_data_alt2
# Extension:
# Compute, for each country, the percentage change of the spending on education
# from the year 2010 to the year 2018 and save this as a variable called
# perc_change.
perc_data <- tidy_data %>%
select(c("country", "year", "EducationSpending")) %>%
dplyr::filter(year %in% c(2010, 2018)) %>%
pivot_wider(
names_from = "year",
values_from = "EducationSpending"
) %>%
dplyr::mutate(perc_change =(`2018` - `2010`)/`2010`*100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment