Skip to content

Instantly share code, notes, and snippets.

@JosepER
Last active March 21, 2023 15:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JosepER/4dc7bd2a3d406a67a758d97d766e32ee to your computer and use it in GitHub Desktop.
Save JosepER/4dc7bd2a3d406a67a758d97d766e32ee to your computer and use it in GitHub Desktop.
library(tidyverse)
library(haven)
#' Download the OECD detailed National Accounts
#'
#' @description
#' Uses the API to the OECD tables to download the National Accounts.
#'
#' Downloads all variables for all years for the selected countries.
#'
#' @details
#' The OECD API takes countries with ISO3. The function uses the METIS 'country'
#' table to convert ISO2 to ISO3.
#'
#' @param countries A string with the list of countries for which the
#' National Accounts should be downloaded. Country names should be in ISO2
#' or ISO3. If NULL (default), the National Accounts for all countries in
#' METIS datasets will be downloaded.
#' @param start_year An integer with the year from which the funciton should
#' start reading the National Accounts. By default it starts at 1967.
#'
#' @return A tibble with 5 columns: country, variable, sector, year, value.
download_oecd_detailed <- function(countries = NULL, start_year = 1967){
countries <- toupper(countries)
assertthat::assert_that(all(stringr::str_length(countries) %in% c(2, 3)),
msg = "Values in 'countries' need to be country codes in ISO2 or ISO3.")
if(any(stringr::str_length(countries) == 2)){
countries <- convert_iso2_to_iso3(countries)
}
variables <- c("NFB2G_B3GR", "NFB2G_B3GP", "NFB2GP", "NFB2GR",
"NFB3GR", "NFB3GP", "NFB3GR",
"NFD11P", "NFD12R", "NFD11R", "NFD1P", "NFD11P", "NFD12P", "NFD1R", "NFD12R",
"NFD4R", "NFD4P",
"NFD41P", "NFD41R", "NFD42R", "NFD42P", "NFD43P", "NFD43R", "NFD44R",
"NFD44P", "NFD45P", "NFD45R", "NFD421P", "NFD421R", "NFD441R", "NFD441P",
"NFD442R", "NFD422R", "NFD442P", "NFD422P", "NFD443P", "NFD443R",
"NFD51P", "NFD51R", "NFD5P", "NFD5R", "NFD59P", "NFD59R",
"NFD61P", "NFD61R", "NFD611P", "NFD611R", "NFD6P", "NFD612R", "NFD612P",
"NFD62P", "NFD62R", "NFD63P", "NFD63R", "NFD6R", "NFD631P", "NFD631R",
"NFD6111P", "NFD6111R", "NFD6121P", "NFD6121R", "NFD612HP", "NFD612HR",
"NFD611HP", "NFD611HR", "NFD632P", "NFD632R", "NFD61SCP", "NFD61SCR",
"NFB5GR", "NFB5GP",
"NFD71P", "NFD71R", "NFD72R", "NFD72P", "NFD74P", "NFD74R", "NFD7P",
"NFD7R", "NFD75R", "NFD75P", "NFD76P", "NFD76R", "NFD751R", "NFD751P",
"NFB6GR", "NFB6GP",
"NFB7GP",
"NFP31P", "NFP3P", "NFP32P",
"NFD8P", "NFD8R",
"NFK1MP", "NFK1P", "NFK1R",
"NFTINTR" # instead of "NFD41GR"
)
downloaded_df <- OECD::get_dataset("SNA_TABLE14A",
filter = list(countries,
variables,
c("S1", "S14", "S14_S15")),
start_time = start_year)
check_duplicates_downloaded_oecd_detailed(downloaded_df)
# subset columns
return(dplyr::select(downloaded_df, country = LOCATION, variable = TRANSACT,
sector = SECTOR, year = Time, value = ObsValue))
}
#' Check for duplicates in downloaded OECD National Accounts
#'
#' @description
#' Throws an error if there are duplicates in the downloaded OECD data.
#'
#' @param downloaded_df A data.frame type of object with the downloaded data.
#'
#' @return NULL if there are no duplicates. Throws an error otherwise.
check_duplicates_downloaded_oecd_detailed <- function(downloaded_df){
nrow_df <- nrow(dplyr::select(downloaded_df, LOCATION, TRANSACT, SECTOR, Time))
nrow_unique_df <- nrow(unique(dplyr::select(downloaded_df, LOCATION, TRANSACT, SECTOR, Time)))
assertthat::assert_that(nrow_df == nrow_unique_df,
msg = "The number of rows in the downloaded dataset is not the same as the number of unique combinations of 'LOCATION', 'TRANSACT', 'SECTOR' and 'Time'. There might be duplicated rows in the downloaded dataset.")
return(invisible(NULL))
}
#' Compute Column from Formula
#'
#' @description
#' The compute_column_from_formula function is a lower-level function that computes a new column in a data.frame based on the given formula.
#'
#' @param data A data.frame containing the input data.
#' @param formula A formula for computing the desired new column.
#'
#' @return A data.frame with the new column added.
#'
#' @note This is a lower-level function and is typically not used directly. Instead, use the compute_columns_from_formulas function to compute multiple columns at once.
compute_column_from_formula <- function(data, formula) {
# Check if the input is a data.frame
if (!is.data.frame(data)) {
stop("The first argument should be a data.frame")
}
# Check if the input is a formula
if (!rlang::is_formula(formula)) {
stop("The second argument should be a formula")
}
# Extract the left-hand and right-hand sides of the formula
lhs <- as.character(formula[[2]])
rhs <- formula[[3]]
# Evaluate the right-hand side expression
new_column_values <- eval(rhs, data)
# Add the new column to the data.frame with the name from the left-hand side of the formula
data[[lhs]] <- new_column_values
# Return the updated data.frame
return(data)
}
#' Compute Columns from Formulas
#'
#' @description
#' The higher-level function of compute_column_from_formula. The compute_columns_from_formulas function computes new columns in a data.frame based on the given list of formulas.
#'
#' @param data A data.frame containing the input data.
#' @param formulas A list of formulas for computing the desired new columns.
#'
#' @return A data.frame with the new columns added.
compute_columns_from_formulas <- function(data, formulas) {
# Check if the input is a data.frame
if (!is.data.frame(data)) {
stop("The first argument should be a data.frame")
}
# Check if the input is a list of formulas
if (!is.list(formulas) || !all(sapply(formulas, rlang::is_formula))) {
stop("The second argument should be a list of formulas")
}
# Iterate through the list of formulas and add a new column for each formula
for (formula in formulas) {
data <- compute_column_from_formula(data, formula)
}
# Return the updated data.frame
return(data)
}
#' Compute estimates from Microdata
#'
#' @description
#' The compute_estimates_from_microdata function computes estimates for specified variables from microdata using provided formulas and weights.
#'
#' @param data A data.frame containing the microdata.
#' @param formulas A list of formulas for computing the desired variables.
#' @param weights A character vector of variable names for weights to be used with each corresponding formula.
#'
#' @return A tibble containing the computed estimates.
compute_estimates_from_microdata <- function(data, formulas, weights){
df <- compute_columns_from_formulas(data, formulas)
# weighted sum of new variables
new_vars <- purrr::map_chr(formulas, ~as.character(.x[[2]]))
tibble::enframe(purrr::map2_dbl(set_names(new_vars), weights, .f = function(var, weights, data){
sum(df[[var]] * df[[weights]])/1000000
}, data),
name = "indicator")
}
#' Compute National Account Ratios
#'
#' @description
#' The compute_national_account_ratios function computes the ratios between estimates from microdata and national accounts data.
#'
#' @param estimates_from_microdata_df A tibble containing the computed estimates from the estimates_from_microdata function.
#' @param national_accounts_table A data.frame containing national accounts data.
#'
#' @return A numeric vector with the computed ratios.
compute_national_account_ratios <- function(estimates_from_microdata_df, national_accounts_table){
national_accounts_table$variable <- stringr::str_remove(national_accounts_table$variable, "^NF" )
national_accounts_table_wide <- national_accounts_table %>%
pivot_wider(id_cols = "country", names_from = "variable", values_from = "value")
national_accounts_estimates <- purrr::map_dbl(estimates_from_microdata_df[["indicator"]],
.f = function(formula_str, df){
formula_expr <- parse(text = formula_str)
# Evaluate the formula expression within the context of the data.frame
new_column <- eval(formula_expr, envir = df)
return(new_column)
},
national_accounts_table_wide)
return(estimates_from_microdata_df[["value"]]/national_accounts_estimates)
}
# Read data ---------------------------------------------------------------
# You can download the files form here: https://www.lisdatacenter.org/resources/self-teaching/
it14ip <- haven::read_dta("it14ip.dta", col_select = c("hid", "ppopwgt", "relation"))
it14ih <- haven::read_dta("it14ih.dta", col_select = c("hid", "hi11", "hi13", "hi12", "hicapital"))
it14i <- left_join(it14ip, it14ih, by = "hid")
ita_na_df <- download_oecd_detailed(countries = "ita", start_year = 2014)
# Data management ---------------------------------------------------------
# keep only 2016 and S14 for this example
ita_na_df <- ita_na_df %>%
filter(year == 2014, sector == "S14") %>%
mutate(value = as.numeric(value))
it14i <- it14i %>%
filter(relation == 1000)
# Compute the estimates form microdata ------------------------------------
it14i_estimates <- compute_estimates_from_microdata(data = it14i,
formulas = list(D11P ~ hi11 + hi13,
`B3GR+D41R+D42R+D45R` ~ hi12 + hicapital),
weights = c("ppopwgt", "ppopwgt"))
# Compute ratio with national accounts ------------------------------------
compute_national_account_ratios(it14i_estimates, ita_na_df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment