Skip to content

Instantly share code, notes, and snippets.

@graebnerc
Created May 4, 2023 19:16
Show Gist options
  • Save graebnerc/aab9dee424b1092df6774152d60cc4e6 to your computer and use it in GitHub Desktop.
Save graebnerc/aab9dee424b1092df6774152d60cc4e6 to your computer and use it in GitHub Desktop.
The script used during session 10.
The script using during session 10. For more precise solutions see:
https://gist.github.com/graebnerc/5d5ec7591a45d6cbad3a58ddf06fff6b
# This is the script used during the lecture. For a more comprehensive solution
# to the exercises see the solutions here:
# https://gist.github.com/graebnerc/5d5ec7591a45d6cbad3a58ddf06fff6b
here::i_am("R/data_analysis_script.R")
library(data.table)
library(here)
library(dplyr)
library(tidyr)
library(countrycode)
# 1. Import data-------------
gdp_file <- here("data/raw/API_NY.GDP.PCAP.PP.KD_DS2_en_csv_v2_5359165.csv")
gpd_data_raw <- fread(
file = gdp_file,
header = TRUE) %>%
as_tibble(.)
# Usind the WDI package:
# wdi_style <- WDI::WDI(indicator = "SH.DYN.MORT")
mortality_data_raw <- fread(
file = here("data/raw/API_SH.DYN.MORT_DS2_en_csv_v2_5358988.csv"),
header = TRUE) %>%
as_tibble(.)
gini_data_raw <- fread(file = here("data/raw/swiid9_4_summary.csv")) %>%
as_tibble(.)
# Transform the data---------
gpd_data <- gpd_data_raw %>%
dplyr::select(
-c("Indicator Name", "Indicator Code", "Country Name", "V67")
) %>%
tidyr::pivot_longer(
cols = -"Country Code",
names_to = "year",
values_to = "GDP"
) %>%
dplyr::mutate(
year = as.double(year)) %>%
dplyr::rename(
country = `Country Code`)
mortality_data <- mortality_data_raw %>%
dplyr::select(
-c("Indicator Name", "Indicator Code", "Country Name", "V67")
) %>%
tidyr::pivot_longer(
cols = -"Country Code",
names_to = "year",
values_to = "Mortality"
) %>%
dplyr::mutate(
year = as.double(year)) %>%
dplyr::rename(
country = `Country Code`)
gini_data <- gini_data_raw %>%
select(c("country", "year", "gini_disp")) %>%
dplyr::filter(country!="Soviet Union") %>%
dplyr::mutate(
country = countrycode(
country, origin = "country.name", destination = "iso3c"))
# Merge World Bank Data------
world_data <- dplyr::inner_join(
x = gpd_data, y = mortality_data, by = c("country", "year")
)
complete_data <- dplyr::inner_join(
x = world_data, y = gini_data, by=c("country", "year")
)
fwrite(
x = complete_data,
file = here("data/tidy/full_data.csv")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment