Created
May 4, 2023 19:16
-
-
Save graebnerc/aab9dee424b1092df6774152d60cc4e6 to your computer and use it in GitHub Desktop.
The script used during session 10.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
The script using during session 10. For more precise solutions see: | |
https://gist.github.com/graebnerc/5d5ec7591a45d6cbad3a58ddf06fff6b |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is the script used during the lecture. For a more comprehensive solution | |
# to the exercises see the solutions here: | |
# https://gist.github.com/graebnerc/5d5ec7591a45d6cbad3a58ddf06fff6b | |
here::i_am("R/data_analysis_script.R") | |
library(data.table) | |
library(here) | |
library(dplyr) | |
library(tidyr) | |
library(countrycode) | |
# 1. Import data------------- | |
gdp_file <- here("data/raw/API_NY.GDP.PCAP.PP.KD_DS2_en_csv_v2_5359165.csv") | |
gpd_data_raw <- fread( | |
file = gdp_file, | |
header = TRUE) %>% | |
as_tibble(.) | |
# Usind the WDI package: | |
# wdi_style <- WDI::WDI(indicator = "SH.DYN.MORT") | |
mortality_data_raw <- fread( | |
file = here("data/raw/API_SH.DYN.MORT_DS2_en_csv_v2_5358988.csv"), | |
header = TRUE) %>% | |
as_tibble(.) | |
gini_data_raw <- fread(file = here("data/raw/swiid9_4_summary.csv")) %>% | |
as_tibble(.) | |
# Transform the data--------- | |
gpd_data <- gpd_data_raw %>% | |
dplyr::select( | |
-c("Indicator Name", "Indicator Code", "Country Name", "V67") | |
) %>% | |
tidyr::pivot_longer( | |
cols = -"Country Code", | |
names_to = "year", | |
values_to = "GDP" | |
) %>% | |
dplyr::mutate( | |
year = as.double(year)) %>% | |
dplyr::rename( | |
country = `Country Code`) | |
mortality_data <- mortality_data_raw %>% | |
dplyr::select( | |
-c("Indicator Name", "Indicator Code", "Country Name", "V67") | |
) %>% | |
tidyr::pivot_longer( | |
cols = -"Country Code", | |
names_to = "year", | |
values_to = "Mortality" | |
) %>% | |
dplyr::mutate( | |
year = as.double(year)) %>% | |
dplyr::rename( | |
country = `Country Code`) | |
gini_data <- gini_data_raw %>% | |
select(c("country", "year", "gini_disp")) %>% | |
dplyr::filter(country!="Soviet Union") %>% | |
dplyr::mutate( | |
country = countrycode( | |
country, origin = "country.name", destination = "iso3c")) | |
# Merge World Bank Data------ | |
world_data <- dplyr::inner_join( | |
x = gpd_data, y = mortality_data, by = c("country", "year") | |
) | |
complete_data <- dplyr::inner_join( | |
x = world_data, y = gini_data, by=c("country", "year") | |
) | |
fwrite( | |
x = complete_data, | |
file = here("data/tidy/full_data.csv") | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment