Skip to content

Instantly share code, notes, and snippets.

@FrankRuns
Created July 17, 2021 13:49
Show Gist options
  • Save FrankRuns/e4058d6e260c45dbf6975d2e520606f8 to your computer and use it in GitHub Desktop.
Save FrankRuns/e4058d6e260c45dbf6975d2e520606f8 to your computer and use it in GitHub Desktop.
# purpose: use changepoint algorithm to benchmark
# Piketty's time buckets for inflationary periods
# 0.0 Packages ----
library(ggplot2) # for visualizations
library(gridExtra) # for plotting charts together
library(lubridate) # for data manipulations
library(tidyverse) # for piping data
library(reshape2) # for wide to long data transform
library(changepoint) # for finding change points in ts data
# 1.0 Load Data ----
# * link to data ----
# https://docs.google.com/spreadsheets/d/1ubqWzCSBgQ9_VCBJOGSwWpRVmhQVzzhxOrv4lYfrV-Y/edit#gid=0
# inf_data <- read.delim(pipe("pbpaste"))
# * subset data
inf_data <- inf_data %>% filter(YEAR > 1760)
# 2.0 Create Changepoints ----
temp_cps <- cpt.meanvar(inf_data$AVERAGE, penalty="Manual",
pen.value="3*log(n)", method="BinSeg",
Q=6, class=FALSE)
new_cps <- c(1761)
for (i in temp_cps) {
hold <- as.numeric(inf_data$YEAR[1]) + as.numeric(i)
new_cps <- c(new_cps, hold)
}
new_cps
# 3.0 Visual 1 ----
# * Piketty's changepoints ----
p_years <- c(1761, 1810, 1870, 1913, 1950, 1970, 1990, 2010)
p <- ggplot(inf_data, aes(x=YEAR, y=AVERAGE)) + geom_line() +
geom_vline(xintercept = p_years, color="red") +
labs(title="Piketty's Buckets", x="Year", y="Avg. Inflation Rate") +
theme_minimal()
# * r pacakge changepoints ----
c <- ggplot(inf_data, aes(x=YEAR, y=AVERAGE, group=1)) + geom_line() +
geom_vline(xintercept = new_cps, color="red") +
labs(title="Change Point's Buckets", x="Year", y="Avg. Inflation Rate") +
theme_minimal()
# * visualize together
grid.arrange(p, c, ncol = 1, nrow = 2)
# 4.0 Visual 2 ----
# * transform Piketty data ----
p_breaks <- c("(1761,1810]", "(1810,1870]", "(1870,1913]", "(1913,1950]", "(1950,1970]", "(1970,1990]", "(1990,2012]")
p_appx_data <- c(0.006, 0.003, 0.006, 0.069, 0.033, 0.071, 0.024)
p_appx_data <- p_appx_data * 100
p_data <- data.frame(pik_buckets = p_breaks, pik_inflation = p_appx_data)
p_data$index <- 1:nrow(p_data)
# * transform cp data ----
inf_data$cp_bucket <- cut(inf_data$YEAR, breaks=new_cps, dig.lab=10)
inf_data$cp_bucket <- ifelse(is.na(inf_data$cp_bucket), as.character(paste0("(",inf_data$YEAR[1],",",new_cps[2],"]")), as.character(inf_data$cp_bucket))
cp_data <- inf_data %>% group_by(cp_bucket) %>%
summarise(cp_inflation = mean(AVERAGE) * 100) %>%
mutate(index = 1:7)
# * plot on same line chart ----
pik_cp_df <- left_join(p_data, cp_data, by="index") %>% select(index, pik_inflation, cp_inflation)
pik_cp_melt <- melt(pik_cp_df, id.vars = "index")
ggplot(pik_cp_melt, aes(x=index, y=value, group=variable, color=variable)) + geom_line() +
geom_point() +
labs(title="Avg. Inflation in Era Buckets", x="Bucket Number", y="Avg. of Avg. Inflation Rate") +
theme_minimal()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment