Last active
March 21, 2023 15:24
-
-
Save JosepER/4dc7bd2a3d406a67a758d97d766e32ee to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(haven) | |
#' Download the OECD detailed National Accounts | |
#' | |
#' @description | |
#' Uses the API to the OECD tables to download the National Accounts. | |
#' | |
#' Downloads all variables for all years for the selected countries. | |
#' | |
#' @details | |
#' The OECD API takes countries with ISO3. The function uses the METIS 'country' | |
#' table to convert ISO2 to ISO3. | |
#' | |
#' @param countries A string with the list of countries for which the | |
#' National Accounts should be downloaded. Country names should be in ISO2 | |
#' or ISO3. If NULL (default), the National Accounts for all countries in | |
#' METIS datasets will be downloaded. | |
#' @param start_year An integer with the year from which the funciton should | |
#' start reading the National Accounts. By default it starts at 1967. | |
#' | |
#' @return A tibble with 5 columns: country, variable, sector, year, value. | |
download_oecd_detailed <- function(countries = NULL, start_year = 1967){ | |
countries <- toupper(countries) | |
assertthat::assert_that(all(stringr::str_length(countries) %in% c(2, 3)), | |
msg = "Values in 'countries' need to be country codes in ISO2 or ISO3.") | |
if(any(stringr::str_length(countries) == 2)){ | |
countries <- convert_iso2_to_iso3(countries) | |
} | |
variables <- c("NFB2G_B3GR", "NFB2G_B3GP", "NFB2GP", "NFB2GR", | |
"NFB3GR", "NFB3GP", "NFB3GR", | |
"NFD11P", "NFD12R", "NFD11R", "NFD1P", "NFD11P", "NFD12P", "NFD1R", "NFD12R", | |
"NFD4R", "NFD4P", | |
"NFD41P", "NFD41R", "NFD42R", "NFD42P", "NFD43P", "NFD43R", "NFD44R", | |
"NFD44P", "NFD45P", "NFD45R", "NFD421P", "NFD421R", "NFD441R", "NFD441P", | |
"NFD442R", "NFD422R", "NFD442P", "NFD422P", "NFD443P", "NFD443R", | |
"NFD51P", "NFD51R", "NFD5P", "NFD5R", "NFD59P", "NFD59R", | |
"NFD61P", "NFD61R", "NFD611P", "NFD611R", "NFD6P", "NFD612R", "NFD612P", | |
"NFD62P", "NFD62R", "NFD63P", "NFD63R", "NFD6R", "NFD631P", "NFD631R", | |
"NFD6111P", "NFD6111R", "NFD6121P", "NFD6121R", "NFD612HP", "NFD612HR", | |
"NFD611HP", "NFD611HR", "NFD632P", "NFD632R", "NFD61SCP", "NFD61SCR", | |
"NFB5GR", "NFB5GP", | |
"NFD71P", "NFD71R", "NFD72R", "NFD72P", "NFD74P", "NFD74R", "NFD7P", | |
"NFD7R", "NFD75R", "NFD75P", "NFD76P", "NFD76R", "NFD751R", "NFD751P", | |
"NFB6GR", "NFB6GP", | |
"NFB7GP", | |
"NFP31P", "NFP3P", "NFP32P", | |
"NFD8P", "NFD8R", | |
"NFK1MP", "NFK1P", "NFK1R", | |
"NFTINTR" # instead of "NFD41GR" | |
) | |
downloaded_df <- OECD::get_dataset("SNA_TABLE14A", | |
filter = list(countries, | |
variables, | |
c("S1", "S14", "S14_S15")), | |
start_time = start_year) | |
check_duplicates_downloaded_oecd_detailed(downloaded_df) | |
# subset columns | |
return(dplyr::select(downloaded_df, country = LOCATION, variable = TRANSACT, | |
sector = SECTOR, year = Time, value = ObsValue)) | |
} | |
#' Check for duplicates in downloaded OECD National Accounts | |
#' | |
#' @description | |
#' Throws an error if there are duplicates in the downloaded OECD data. | |
#' | |
#' @param downloaded_df A data.frame type of object with the downloaded data. | |
#' | |
#' @return NULL if there are no duplicates. Throws an error otherwise. | |
check_duplicates_downloaded_oecd_detailed <- function(downloaded_df){ | |
nrow_df <- nrow(dplyr::select(downloaded_df, LOCATION, TRANSACT, SECTOR, Time)) | |
nrow_unique_df <- nrow(unique(dplyr::select(downloaded_df, LOCATION, TRANSACT, SECTOR, Time))) | |
assertthat::assert_that(nrow_df == nrow_unique_df, | |
msg = "The number of rows in the downloaded dataset is not the same as the number of unique combinations of 'LOCATION', 'TRANSACT', 'SECTOR' and 'Time'. There might be duplicated rows in the downloaded dataset.") | |
return(invisible(NULL)) | |
} | |
#' Compute Column from Formula | |
#' | |
#' @description | |
#' The compute_column_from_formula function is a lower-level function that computes a new column in a data.frame based on the given formula. | |
#' | |
#' @param data A data.frame containing the input data. | |
#' @param formula A formula for computing the desired new column. | |
#' | |
#' @return A data.frame with the new column added. | |
#' | |
#' @note This is a lower-level function and is typically not used directly. Instead, use the compute_columns_from_formulas function to compute multiple columns at once. | |
compute_column_from_formula <- function(data, formula) { | |
# Check if the input is a data.frame | |
if (!is.data.frame(data)) { | |
stop("The first argument should be a data.frame") | |
} | |
# Check if the input is a formula | |
if (!rlang::is_formula(formula)) { | |
stop("The second argument should be a formula") | |
} | |
# Extract the left-hand and right-hand sides of the formula | |
lhs <- as.character(formula[[2]]) | |
rhs <- formula[[3]] | |
# Evaluate the right-hand side expression | |
new_column_values <- eval(rhs, data) | |
# Add the new column to the data.frame with the name from the left-hand side of the formula | |
data[[lhs]] <- new_column_values | |
# Return the updated data.frame | |
return(data) | |
} | |
#' Compute Columns from Formulas | |
#' | |
#' @description | |
#' The higher-level function of compute_column_from_formula. The compute_columns_from_formulas function computes new columns in a data.frame based on the given list of formulas. | |
#' | |
#' @param data A data.frame containing the input data. | |
#' @param formulas A list of formulas for computing the desired new columns. | |
#' | |
#' @return A data.frame with the new columns added. | |
compute_columns_from_formulas <- function(data, formulas) { | |
# Check if the input is a data.frame | |
if (!is.data.frame(data)) { | |
stop("The first argument should be a data.frame") | |
} | |
# Check if the input is a list of formulas | |
if (!is.list(formulas) || !all(sapply(formulas, rlang::is_formula))) { | |
stop("The second argument should be a list of formulas") | |
} | |
# Iterate through the list of formulas and add a new column for each formula | |
for (formula in formulas) { | |
data <- compute_column_from_formula(data, formula) | |
} | |
# Return the updated data.frame | |
return(data) | |
} | |
#' Compute estimates from Microdata | |
#' | |
#' @description | |
#' The compute_estimates_from_microdata function computes estimates for specified variables from microdata using provided formulas and weights. | |
#' | |
#' @param data A data.frame containing the microdata. | |
#' @param formulas A list of formulas for computing the desired variables. | |
#' @param weights A character vector of variable names for weights to be used with each corresponding formula. | |
#' | |
#' @return A tibble containing the computed estimates. | |
compute_estimates_from_microdata <- function(data, formulas, weights){ | |
df <- compute_columns_from_formulas(data, formulas) | |
# weighted sum of new variables | |
new_vars <- purrr::map_chr(formulas, ~as.character(.x[[2]])) | |
tibble::enframe(purrr::map2_dbl(set_names(new_vars), weights, .f = function(var, weights, data){ | |
sum(df[[var]] * df[[weights]])/1000000 | |
}, data), | |
name = "indicator") | |
} | |
#' Compute National Account Ratios | |
#' | |
#' @description | |
#' The compute_national_account_ratios function computes the ratios between estimates from microdata and national accounts data. | |
#' | |
#' @param estimates_from_microdata_df A tibble containing the computed estimates from the estimates_from_microdata function. | |
#' @param national_accounts_table A data.frame containing national accounts data. | |
#' | |
#' @return A numeric vector with the computed ratios. | |
compute_national_account_ratios <- function(estimates_from_microdata_df, national_accounts_table){ | |
national_accounts_table$variable <- stringr::str_remove(national_accounts_table$variable, "^NF" ) | |
national_accounts_table_wide <- national_accounts_table %>% | |
pivot_wider(id_cols = "country", names_from = "variable", values_from = "value") | |
national_accounts_estimates <- purrr::map_dbl(estimates_from_microdata_df[["indicator"]], | |
.f = function(formula_str, df){ | |
formula_expr <- parse(text = formula_str) | |
# Evaluate the formula expression within the context of the data.frame | |
new_column <- eval(formula_expr, envir = df) | |
return(new_column) | |
}, | |
national_accounts_table_wide) | |
return(estimates_from_microdata_df[["value"]]/national_accounts_estimates) | |
} | |
# Read data --------------------------------------------------------------- | |
# You can download the files form here: https://www.lisdatacenter.org/resources/self-teaching/ | |
it14ip <- haven::read_dta("it14ip.dta", col_select = c("hid", "ppopwgt", "relation")) | |
it14ih <- haven::read_dta("it14ih.dta", col_select = c("hid", "hi11", "hi13", "hi12", "hicapital")) | |
it14i <- left_join(it14ip, it14ih, by = "hid") | |
ita_na_df <- download_oecd_detailed(countries = "ita", start_year = 2014) | |
# Data management --------------------------------------------------------- | |
# keep only 2016 and S14 for this example | |
ita_na_df <- ita_na_df %>% | |
filter(year == 2014, sector == "S14") %>% | |
mutate(value = as.numeric(value)) | |
it14i <- it14i %>% | |
filter(relation == 1000) | |
# Compute the estimates form microdata ------------------------------------ | |
it14i_estimates <- compute_estimates_from_microdata(data = it14i, | |
formulas = list(D11P ~ hi11 + hi13, | |
`B3GR+D41R+D42R+D45R` ~ hi12 + hicapital), | |
weights = c("ppopwgt", "ppopwgt")) | |
# Compute ratio with national accounts ------------------------------------ | |
compute_national_account_ratios(it14i_estimates, ita_na_df) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment