graebnerc/#S10 - Script

## #S10 - Script
The script using during session 10. For more precise solutions see:
https://gist.github.com/graebnerc/5d5ec7591a45d6cbad3a58ddf06fff6b

## data_analysis_script.R
# This is the script used during the lecture. For a more comprehensive solution
#  to the exercises see the solutions here:
#  https://gist.github.com/graebnerc/5d5ec7591a45d6cbad3a58ddf06fff6b
here::i_am("R/data_analysis_script.R")
library(data.table)
library(here)
library(dplyr)
library(tidyr)
library(countrycode)

# 1. Import data-------------

gdp_file <- here("data/raw/API_NY.GDP.PCAP.PP.KD_DS2_en_csv_v2_5359165.csv")

gpd_data_raw <- fread(
  file = gdp_file,
  header = TRUE) %>%
  as_tibble(.)
# Usind the WDI package:
# wdi_style <- WDI::WDI(indicator = "SH.DYN.MORT")

mortality_data_raw <- fread(
  file = here("data/raw/API_SH.DYN.MORT_DS2_en_csv_v2_5358988.csv"),
  header = TRUE) %>%
  as_tibble(.)

gini_data_raw <- fread(file = here("data/raw/swiid9_4_summary.csv")) %>%
  as_tibble(.)

# Transform the data---------
gpd_data <- gpd_data_raw %>%
  dplyr::select(
    -c("Indicator Name", "Indicator Code", "Country Name", "V67")
    ) %>%
  tidyr::pivot_longer(
    cols = -"Country Code",
    names_to = "year",
    values_to = "GDP"
    ) %>%
  dplyr::mutate(
    year = as.double(year)) %>%
  dplyr::rename(
    country = `Country Code`)

mortality_data <- mortality_data_raw %>%
  dplyr::select(
    -c("Indicator Name", "Indicator Code", "Country Name", "V67")
  ) %>%
  tidyr::pivot_longer(
    cols = -"Country Code",
    names_to = "year",
    values_to = "Mortality"
  ) %>%
  dplyr::mutate(
    year = as.double(year)) %>%
  dplyr::rename(
    country = `Country Code`)

gini_data <- gini_data_raw %>%
  select(c("country", "year", "gini_disp")) %>%
  dplyr::filter(country!="Soviet Union") %>%
  dplyr::mutate(
    country = countrycode(
      country, origin = "country.name", destination = "iso3c"))

# Merge World Bank Data------
world_data <- dplyr::inner_join(
  x = gpd_data, y = mortality_data, by = c("country", "year")
  )
complete_data <- dplyr::inner_join(
  x = world_data, y = gini_data, by=c("country", "year")
  )
fwrite(
  x = complete_data,
  file = here("data/tidy/full_data.csv")
  )
	The script using during session 10. For more precise solutions see:
	https://gist.github.com/graebnerc/5d5ec7591a45d6cbad3a58ddf06fff6b
	# This is the script used during the lecture. For a more comprehensive solution
	# to the exercises see the solutions here:
	# https://gist.github.com/graebnerc/5d5ec7591a45d6cbad3a58ddf06fff6b
	here::i_am("R/data_analysis_script.R")
	library(data.table)
	library(here)
	library(dplyr)
	library(tidyr)
	library(countrycode)

	# 1. Import data-------------

	gdp_file <- here("data/raw/API_NY.GDP.PCAP.PP.KD_DS2_en_csv_v2_5359165.csv")

	gpd_data_raw <- fread(
	file = gdp_file,
	header = TRUE) %>%
	as_tibble(.)
	# Usind the WDI package:
	# wdi_style <- WDI::WDI(indicator = "SH.DYN.MORT")

	mortality_data_raw <- fread(
	file = here("data/raw/API_SH.DYN.MORT_DS2_en_csv_v2_5358988.csv"),
	header = TRUE) %>%
	as_tibble(.)

	gini_data_raw <- fread(file = here("data/raw/swiid9_4_summary.csv")) %>%
	as_tibble(.)

	# Transform the data---------
	gpd_data <- gpd_data_raw %>%
	dplyr::select(
	-c("Indicator Name", "Indicator Code", "Country Name", "V67")
	) %>%
	tidyr::pivot_longer(
	cols = -"Country Code",
	names_to = "year",
	values_to = "GDP"
	) %>%
	dplyr::mutate(
	year = as.double(year)) %>%
	dplyr::rename(
	country = `Country Code`)

	mortality_data <- mortality_data_raw %>%
	dplyr::select(
	-c("Indicator Name", "Indicator Code", "Country Name", "V67")
	) %>%
	tidyr::pivot_longer(
	cols = -"Country Code",
	names_to = "year",
	values_to = "Mortality"
	) %>%
	dplyr::mutate(
	year = as.double(year)) %>%
	dplyr::rename(
	country = `Country Code`)

	gini_data <- gini_data_raw %>%
	select(c("country", "year", "gini_disp")) %>%
	dplyr::filter(country!="Soviet Union") %>%
	dplyr::mutate(
	country = countrycode(
	country, origin = "country.name", destination = "iso3c"))

	# Merge World Bank Data------
	world_data <- dplyr::inner_join(
	x = gpd_data, y = mortality_data, by = c("country", "year")
	)
	complete_data <- dplyr::inner_join(
	x = world_data, y = gini_data, by=c("country", "year")
	)
	fwrite(
	x = complete_data,
	file = here("data/tidy/full_data.csv")
	)