Created
July 17, 2021 13:49
-
-
Save FrankRuns/e4058d6e260c45dbf6975d2e520606f8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# purpose: use changepoint algorithm to benchmark | |
# Piketty's time buckets for inflationary periods | |
# 0.0 Packages ---- | |
library(ggplot2) # for visualizations | |
library(gridExtra) # for plotting charts together | |
library(lubridate) # for data manipulations | |
library(tidyverse) # for piping data | |
library(reshape2) # for wide to long data transform | |
library(changepoint) # for finding change points in ts data | |
# 1.0 Load Data ---- | |
# * link to data ---- | |
# https://docs.google.com/spreadsheets/d/1ubqWzCSBgQ9_VCBJOGSwWpRVmhQVzzhxOrv4lYfrV-Y/edit#gid=0 | |
# inf_data <- read.delim(pipe("pbpaste")) | |
# * subset data | |
inf_data <- inf_data %>% filter(YEAR > 1760) | |
# 2.0 Create Changepoints ---- | |
temp_cps <- cpt.meanvar(inf_data$AVERAGE, penalty="Manual", | |
pen.value="3*log(n)", method="BinSeg", | |
Q=6, class=FALSE) | |
new_cps <- c(1761) | |
for (i in temp_cps) { | |
hold <- as.numeric(inf_data$YEAR[1]) + as.numeric(i) | |
new_cps <- c(new_cps, hold) | |
} | |
new_cps | |
# 3.0 Visual 1 ---- | |
# * Piketty's changepoints ---- | |
p_years <- c(1761, 1810, 1870, 1913, 1950, 1970, 1990, 2010) | |
p <- ggplot(inf_data, aes(x=YEAR, y=AVERAGE)) + geom_line() + | |
geom_vline(xintercept = p_years, color="red") + | |
labs(title="Piketty's Buckets", x="Year", y="Avg. Inflation Rate") + | |
theme_minimal() | |
# * r pacakge changepoints ---- | |
c <- ggplot(inf_data, aes(x=YEAR, y=AVERAGE, group=1)) + geom_line() + | |
geom_vline(xintercept = new_cps, color="red") + | |
labs(title="Change Point's Buckets", x="Year", y="Avg. Inflation Rate") + | |
theme_minimal() | |
# * visualize together | |
grid.arrange(p, c, ncol = 1, nrow = 2) | |
# 4.0 Visual 2 ---- | |
# * transform Piketty data ---- | |
p_breaks <- c("(1761,1810]", "(1810,1870]", "(1870,1913]", "(1913,1950]", "(1950,1970]", "(1970,1990]", "(1990,2012]") | |
p_appx_data <- c(0.006, 0.003, 0.006, 0.069, 0.033, 0.071, 0.024) | |
p_appx_data <- p_appx_data * 100 | |
p_data <- data.frame(pik_buckets = p_breaks, pik_inflation = p_appx_data) | |
p_data$index <- 1:nrow(p_data) | |
# * transform cp data ---- | |
inf_data$cp_bucket <- cut(inf_data$YEAR, breaks=new_cps, dig.lab=10) | |
inf_data$cp_bucket <- ifelse(is.na(inf_data$cp_bucket), as.character(paste0("(",inf_data$YEAR[1],",",new_cps[2],"]")), as.character(inf_data$cp_bucket)) | |
cp_data <- inf_data %>% group_by(cp_bucket) %>% | |
summarise(cp_inflation = mean(AVERAGE) * 100) %>% | |
mutate(index = 1:7) | |
# * plot on same line chart ---- | |
pik_cp_df <- left_join(p_data, cp_data, by="index") %>% select(index, pik_inflation, cp_inflation) | |
pik_cp_melt <- melt(pik_cp_df, id.vars = "index") | |
ggplot(pik_cp_melt, aes(x=index, y=value, group=variable, color=variable)) + geom_line() + | |
geom_point() + | |
labs(title="Avg. Inflation in Era Buckets", x="Bucket Number", y="Avg. of Avg. Inflation Rate") + | |
theme_minimal() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment