Created
February 4, 2021 13:36
-
-
Save krz/da9b06f8049d79fd8bee69e10d2d4af9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(MarketMatching) | |
library(tidyverse) | |
library(janitor) | |
library(lubridate) | |
library(patchwork) | |
# load data | |
sales <- read_csv("Passport_Stats.csv", | |
col_types = cols(`2004` = col_number(), | |
`2005` = col_number(), `2006` = col_number(), | |
`2007` = col_number(), `2008` = col_number(), | |
`2009` = col_number(), `2010` = col_number(), | |
`2011` = col_number(), `2012` = col_number(), | |
`2013` = col_number(), `2014` = col_number(), | |
`2015` = col_number(), `2016` = col_number(), | |
`2017` = col_number(), `2018` = col_number())) %>% clean_names() %>% | |
#filter(category == "Carbonates") %>% | |
filter(data_type == "Off-trade Volume") %>% | |
select(-data_type, -unit) %>% | |
filter(geography %in% c("Austria", "Bulgaria", "Croatia", "Republic of Cyprus", "Czech Republic", | |
"France", "Germany", "Greece", "Italy", "Lithuania", | |
# "Finland" | |
# "Ireland", | |
# "United Kingdom", | |
#"Latvia", | |
#"Estonia", | |
#"Belgium", | |
#"Spain", | |
#"Denmark", | |
"Hungary", | |
# "Portugal", | |
"Luxembourg", "Malta", "Netherlands", "Poland", "Romania", "Slovakia", "Slovenia", "Sweden", | |
"Switzerland")) | |
df <- gather(sales, "year", "value", x2004:x2018) %>% | |
arrange(geography, year, value) %>% mutate() %>% | |
mutate(year = as.numeric(str_sub(year, 2)), | |
value = as.numeric(value)) %>% | |
mutate(treat.fr = if_else(year >= 2012 & geography == "France", 1, 0), | |
treat.hu = if_else(year >= 2011 & geography == "Hungary", 1, 0)) | |
## Analysis | |
# France | |
#carbonates | |
mm.fr.carb.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>% | |
filter(category == "Carbonates" | | |
category == "Energy Drinks" | | |
category == "Sports Drinks", | |
geography != "Hungary") %>% | |
group_by(geography, year) %>% | |
summarize(value = sum(value, na.rm = T)), | |
id_variable="geography", | |
date_variable="year", | |
matching_variable="value", | |
parallel=FALSE, | |
warping_limit=1, # warping limit=1 | |
dtw_emphasis=1, # rely only on dtw for pre-screening | |
matches=16, # request 5 matches | |
start_match_period="2004-01-01", | |
end_match_period="2011-01-01") | |
mm.fr.carb <- MarketMatching::inference(matched_markets = mm.fr.carb.match, | |
analyze_betas=TRUE, | |
test_market = "France", | |
control_matches = 10, | |
prior_level_sd=0.01, | |
end_post_period = "2018-10-01") | |
mm.fr.carb$PlotPointEffect | |
mm.fr.carb$CausalImpactObject | |
# juice | |
mm.fr.juice.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>% | |
filter(category == "Juice", | |
geography != "Hungary"), | |
id_variable="geography", | |
date_variable="year", | |
matching_variable="value", | |
parallel=FALSE, | |
warping_limit=1, # warping limit=1 | |
dtw_emphasis=1, # rely only on dtw for pre-screening | |
matches=16, # request 5 matches | |
start_match_period="2004-01-01", | |
end_match_period="2011-01-01") | |
mm.fr.juice <- MarketMatching::inference(matched_markets = mm.fr.juice.match, | |
analyze_betas=TRUE, | |
test_market = "France", | |
control_matches = 10, | |
prior_level_sd=0.01, | |
end_post_period = "2018-10-01") | |
mm.fr.juice$PlotPointEffect | |
mm.fr.juice$CausalImpactObject | |
# bottled water | |
mm.fr.water.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>% | |
filter(category == "Bottled Water", | |
geography != "Hungary"), | |
id_variable="geography", | |
date_variable="year", | |
matching_variable="value", | |
parallel=FALSE, | |
warping_limit=1, # warping limit=1 | |
dtw_emphasis=1, # rely only on dtw for pre-screening | |
matches=16, # request 5 matches | |
start_match_period="2004-01-01", | |
end_match_period="2011-01-01") | |
mm.fr.water <- MarketMatching::inference(matched_markets = mm.fr.water.match, | |
analyze_betas=TRUE, | |
test_market = "France", | |
control_matches = 10, | |
prior_level_sd=0.01, | |
end_post_period = "2018-10-01") | |
mm.fr.water$PlotPointEffect | |
mm.fr.water$CausalImpactObject | |
# all soft drink | |
mm.fr.all.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>% filter(geography != "Hungary") %>% | |
group_by(geography, year) %>% | |
summarize(value = sum(value, na.rm = T)), | |
id_variable="geography", | |
date_variable="year", | |
matching_variable="value", | |
parallel=FALSE, | |
warping_limit=1, # warping limit=1 | |
dtw_emphasis=1, # rely only on dtw for pre-screening | |
matches=16, # request 5 matches | |
start_match_period="2004-01-01", | |
end_match_period="2011-01-01") | |
mm.fr.all <- MarketMatching::inference(matched_markets = mm.fr.all.match, | |
analyze_betas=TRUE, | |
test_market = "France", | |
control_matches = 10, | |
prior_level_sd=0.1, | |
end_post_period = "2018-10-01") | |
mm.fr.all$PlotPointEffect | |
mm.fr.all$CausalImpactObject | |
# plot france | |
plot.fr.ci <- rbind(plot(mm.fr.carb$CausalImpactObject, "original")$data, | |
plot(mm.fr.juice$CausalImpactObject, "original")$data, | |
plot(mm.fr.water$CausalImpactObject, "original")$data, | |
plot(mm.fr.all$CausalImpactObject, "original")$data) %>% | |
mutate(cat = c(rep("SSBs", 15), rep("Juice", 15), rep("Bottled water", 15), rep("All soft drinks", 15)) ) | |
plot.fr.ci$cat <- factor(plot.fr.ci$cat, levels = c("SSBs", "Juice", "Bottled water", "All soft drinks")) | |
p1.fr.ci <- ggplot(plot.fr.ci, aes(x = time, y = response)) + | |
facet_wrap(~cat, nrow=1) + | |
theme_bw() + | |
geom_ribbon(data=plot.fr.ci, aes(ymin=lower,ymax=upper), alpha=0.9, color=NA, fill = "#9ecae1") + | |
geom_vline(xintercept = as.Date("2012-01-01"), linetype = "dashed", color="darkgray") + | |
geom_line(aes(color = "observed")) + | |
geom_line(aes(y = mean, color = "counterfactual"), linetype = "dashed", size = 0.6) + | |
scale_color_manual(name = "Colors", | |
values = c("observed" = "black", "counterfactual" = "#3182bd")) + | |
theme(legend.position="top", legend.title=element_blank()) + | |
xlab("Years") + | |
ylab("Sales in million liters") + | |
ggtitle("France") + coord_cartesian(ylim = c(0, 15000)) | |
p1.fr.ci | |
plot.fr.ci.att <- rbind(plot(mm.fr.carb$CausalImpactObject, "pointwise")$data, | |
plot(mm.fr.juice$CausalImpactObject, "pointwise")$data, | |
plot(mm.fr.water$CausalImpactObject, "pointwise")$data, | |
plot(mm.fr.all$CausalImpactObject, "pointwise")$data) %>% | |
mutate(cat = c(rep("SSBs", 15), rep("Juice", 15), rep("Bottled water", 15), rep("All soft drinks", 15)) ) | |
plot.fr.ci.att$cat <- factor(plot.fr.ci.att$cat, levels = c("SSBs", "Juice", "Bottled water", "All soft drinks")) | |
p2.fr.ci <- ggplot(plot.fr.ci.att, aes(x = time, y = mean)) + | |
facet_wrap(~cat, nrow=1) + | |
theme_bw() + | |
geom_ribbon(data=plot.fr.ci.att, aes(ymin=lower,ymax=upper), alpha = 0.9, color=NA, fill = "#9ecae1") + | |
geom_hline(yintercept = 0, color="gray") + | |
geom_line(linetype="dashed", color = "#3182bd", size= 0.6) + | |
geom_vline(xintercept = as.Date("2012-01-01"), linetype = "dashed", color="darkgray") + | |
xlab("Years") + | |
ylab("ATT") + coord_cartesian(ylim = c(-1000, 3000)) | |
p2.fr.ci | |
comb.fr.ci <- p1.fr.ci + p2.fr.ci + plot_layout(ncol = 1) | |
ggplot2::ggsave(filename="comb_fr_ci.pdf", plot = comb.fr.ci, width = 7, height = 7) | |
# hungary | |
#carbonates | |
mm.hu.carb.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>% | |
filter(category == "Carbonates" | | |
category == "Energy Drinks" | | |
category == "Sports Drinks", | |
geography != "France") %>% | |
group_by(geography, year) %>% | |
summarize(value = sum(value, na.rm = T)), | |
id_variable="geography", | |
date_variable="year", | |
matching_variable="value", | |
parallel=FALSE, | |
warping_limit=1, # warping limit=1 | |
dtw_emphasis=1, # rely only on dtw for pre-screening | |
matches=16, # request 5 matches | |
start_match_period="2004-01-01", | |
end_match_period="2012-01-01") | |
mm.hu.carb <- MarketMatching::inference(matched_markets = mm.hu.carb.match, | |
analyze_betas=TRUE, | |
test_market = "Hungary", | |
control_matches = 10, | |
prior_level_sd=0.1, | |
end_post_period = "2018-10-01") | |
mm.hu.carb$PlotPointEffect | |
mm.hu.carb$CausalImpactObject | |
# juice | |
mm.hu.juice.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>% | |
filter(category == "Juice", | |
geography != "France"), | |
id_variable="geography", | |
date_variable="year", | |
matching_variable="value", | |
parallel=FALSE, | |
warping_limit=1, # warping limit=1 | |
dtw_emphasis=1, # rely only on dtw for pre-screening | |
matches=16, # request 5 matches | |
start_match_period="2004-01-01", | |
end_match_period="2012-01-01") | |
mm.hu.juice <- MarketMatching::inference(matched_markets = mm.hu.juice.match, | |
analyze_betas=TRUE, | |
test_market = "Hungary", | |
control_matches = 10, | |
prior_level_sd=0.01, | |
end_post_period = "2018-10-01") | |
mm.hu.juice$PlotPointEffect | |
mm.hu.juice$CausalImpactObject | |
# bottled water | |
mm.hu.water.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>% | |
filter(category == "Bottled Water", | |
geography != "France"), | |
id_variable="geography", | |
date_variable="year", | |
matching_variable="value", | |
parallel=FALSE, | |
warping_limit=1, # warping limit=1 | |
dtw_emphasis=1, # rely only on dtw for pre-screening | |
matches=16, # request 5 matches | |
start_match_period="2004-01-01", | |
end_match_period="2012-01-01") | |
mm.hu.water <- MarketMatching::inference(matched_markets = mm.hu.water.match, | |
analyze_betas=TRUE, | |
test_market = "Hungary", | |
control_matches = 10, | |
prior_level_sd=0.01, | |
end_post_period = "2018-10-01") | |
mm.hu.water$PlotPointEffect | |
mm.hu.water$CausalImpactObject | |
# all soft drink | |
mm.hu.all.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>% filter(geography != "France") %>% | |
group_by(geography, year) %>% | |
summarize(value = sum(value, na.rm = T)), | |
id_variable="geography", | |
date_variable="year", | |
matching_variable="value", | |
parallel=FALSE, | |
warping_limit=1, # warping limit=1 | |
dtw_emphasis=1, # rely only on dtw for pre-screening | |
matches=16, # request 5 matches | |
start_match_period="2004-01-01", | |
end_match_period="2012-01-01") | |
mm.hu.all <- MarketMatching::inference(matched_markets = mm.hu.all.match, | |
analyze_betas=TRUE, | |
test_market = "Hungary", | |
control_matches = 10, | |
prior_level_sd=0.1, | |
end_post_period = "2018-10-01") | |
mm.hu.all$PlotPointEffect | |
mm.hu.all$CausalImpactObject | |
# plot hungary | |
plot.hu.ci <- rbind(plot(mm.hu.carb$CausalImpactObject, "original")$data, | |
plot(mm.hu.juice$CausalImpactObject, "original")$data, | |
plot(mm.hu.water$CausalImpactObject, "original")$data, | |
plot(mm.hu.all$CausalImpactObject, "original")$data) %>% | |
mutate(cat = c(rep("SSBs", 15), rep("Juice", 15), rep("Bottled water", 15), rep("All soft drinks", 15)) ) | |
plot.hu.ci$cat <- factor(plot.hu.ci$cat, levels = c("SSBs", "Juice", "Bottled water", "All soft drinks")) | |
p1.hu.ci <- ggplot(plot.hu.ci, aes(x = time, y = response)) + | |
facet_wrap(~cat, nrow=1) + | |
theme_bw() + | |
geom_ribbon(data=plot.hu.ci, aes(ymin=lower,ymax=upper), alpha=0.9, color=NA, fill = "#a1d99b") + | |
geom_vline(xintercept = as.Date("2012-01-01"), linetype = "dashed", color="darkgray") + | |
geom_line(aes(color = "observed")) + | |
geom_line(aes(y = mean, color = "counterfactual"), linetype = "dashed", size = 0.6) + | |
scale_color_manual(name = "Colors", | |
values = c("observed" = "black", "counterfactual" = "#31a354")) + | |
theme(legend.position="top", legend.title=element_blank()) + | |
xlab("Years") + | |
ylab("Sales in million litres") + | |
ggtitle("Hungary") + coord_cartesian(ylim = c(0, 2500)) | |
p1.hu.ci | |
plot.hu.ci.att <- rbind(plot(mm.hu.carb$CausalImpactObject, "pointwise")$data, | |
plot(mm.hu.juice$CausalImpactObject, "pointwise")$data, | |
plot(mm.hu.water$CausalImpactObject, "pointwise")$data, | |
plot(mm.hu.all$CausalImpactObject, "pointwise")$data) %>% | |
mutate(cat = c(rep("SSBs", 15), rep("Juice", 15), rep("Bottled water", 15), rep("All soft drinks", 15)) ) | |
plot.hu.ci.att$cat <- factor(plot.hu.ci.att$cat, levels = c("SSBs", "Juice", "Bottled water", "All soft drinks")) | |
p2.hu.ci <- ggplot(plot.hu.ci.att, aes(x = time, y = mean)) + | |
facet_wrap(~cat, nrow=1) + | |
theme_bw() + | |
geom_ribbon(data=plot.hu.ci.att, aes(ymin=lower,ymax=upper), alpha = 0.9, color=NA, fill = "#a1d99b") + | |
geom_hline(yintercept = 0, color="gray") + | |
geom_line(linetype="dashed", color = "#31a354", size= 0.6) + | |
geom_vline(xintercept = as.Date("2012-01-01"), linetype = "dashed", color="darkgray") + | |
xlab("Years") + | |
ylab("ATT") + coord_cartesian(ylim = c(-1050, 1050)) | |
p2.hu.ci | |
comb.hu.ci <- p1.hu.ci + p2.hu.ci + plot_layout(ncol = 1) | |
ggplot2::ggsave(filename="comb_hu_ci.pdf", plot = comb.hu.ci, width = 7, height = 7) | |
mm.hu.carb$CausalImpactObject |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment