Skip to content

Instantly share code, notes, and snippets.

@krz
Created February 3, 2021 12:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save krz/d68921301be76522b9e738edd93db0b9 to your computer and use it in GitHub Desktop.
Save krz/d68921301be76522b9e738edd93db0b9 to your computer and use it in GitHub Desktop.
library(MarketMatching)
library(tidyverse)
library(janitor)
library(lubridate)
library(patchwork)
# load data
sales <- read_csv("Passport_Stats.csv",
col_types = cols(`2004` = col_number(),
`2005` = col_number(), `2006` = col_number(),
`2007` = col_number(), `2008` = col_number(),
`2009` = col_number(), `2010` = col_number(),
`2011` = col_number(), `2012` = col_number(),
`2013` = col_number(), `2014` = col_number(),
`2015` = col_number(), `2016` = col_number(),
`2017` = col_number(), `2018` = col_number())) %>% clean_names() %>%
#filter(category == "Carbonates") %>%
filter(data_type == "Off-trade Volume") %>%
select(-data_type, -unit) %>%
filter(geography %in% c("Austria", "Bulgaria", "Croatia", "Republic of Cyprus", "Czech Republic",
"France", "Germany", "Greece", "Italy", "Lithuania",
# "Finland"
# "Ireland",
# "United Kingdom",
#"Latvia",
#"Estonia",
#"Belgium",
#"Spain",
#"Denmark",
"Hungary",
# "Portugal",
"Luxembourg", "Malta", "Netherlands", "Poland", "Romania", "Slovakia", "Slovenia", "Sweden",
"Switzerland"))
df <- gather(sales, "year", "value", x2004:x2018) %>%
arrange(geography, year, value) %>% mutate() %>%
mutate(year = as.numeric(str_sub(year, 2)),
value = as.numeric(value)) %>%
mutate(treat.fr = if_else(year >= 2012 & geography == "France", 1, 0),
treat.hu = if_else(year >= 2011 & geography == "Hungary", 1, 0))
## Analysis
# France
#carbonates
mm.fr.carb.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>%
filter(category == "Carbonates" |
category == "Energy Drinks" |
category == "Sports Drinks",
geography != "Hungary") %>%
group_by(geography, year) %>%
summarize(value = sum(value, na.rm = T)),
id_variable="geography",
date_variable="year",
matching_variable="value",
parallel=FALSE,
warping_limit=1, # warping limit=1
dtw_emphasis=1, # rely only on dtw for pre-screening
matches=16, # request 5 matches
start_match_period="2004-01-01",
end_match_period="2011-01-01")
mm.fr.carb <- MarketMatching::inference(matched_markets = mm.fr.carb.match,
analyze_betas=TRUE,
test_market = "France",
control_matches = 10,
prior_level_sd=0.01,
end_post_period = "2018-10-01")
mm.fr.carb$PlotPointEffect
mm.fr.carb$CausalImpactObject
# juice
mm.fr.juice.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>%
filter(category == "Juice",
geography != "Hungary"),
id_variable="geography",
date_variable="year",
matching_variable="value",
parallel=FALSE,
warping_limit=1, # warping limit=1
dtw_emphasis=1, # rely only on dtw for pre-screening
matches=16, # request 5 matches
start_match_period="2004-01-01",
end_match_period="2011-01-01")
mm.fr.juice <- MarketMatching::inference(matched_markets = mm.fr.juice.match,
analyze_betas=TRUE,
test_market = "France",
control_matches = 10,
prior_level_sd=0.01,
end_post_period = "2018-10-01")
mm.fr.water$PlotPointEffect
mm.fr.water$CausalImpactObject
# bottled water
mm.fr.water.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>%
filter(category == "Bottled Water",
geography != "Hungary"),
id_variable="geography",
date_variable="year",
matching_variable="value",
parallel=FALSE,
warping_limit=1, # warping limit=1
dtw_emphasis=1, # rely only on dtw for pre-screening
matches=16, # request 5 matches
start_match_period="2004-01-01",
end_match_period="2011-01-01")
mm.fr.water <- MarketMatching::inference(matched_markets = mm.fr.water.match,
analyze_betas=TRUE,
test_market = "France",
control_matches = 10,
prior_level_sd=0.01,
end_post_period = "2018-10-01")
mm.fr.all$PlotPointEffect
mm.fr.all$CausalImpactObject
# all soft drink
mm.fr.all.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>% filter(geography != "Hungary") %>%
group_by(geography, year) %>%
summarize(value = sum(value, na.rm = T)),
id_variable="geography",
date_variable="year",
matching_variable="value",
parallel=FALSE,
warping_limit=1, # warping limit=1
dtw_emphasis=1, # rely only on dtw for pre-screening
matches=16, # request 5 matches
start_match_period="2004-01-01",
end_match_period="2011-01-01")
mm.fr.all <- MarketMatching::inference(matched_markets = mm.fr.all.match,
analyze_betas=TRUE,
test_market = "France",
control_matches = 10,
prior_level_sd=0.1,
end_post_period = "2018-10-01")
mm.fr.all$PlotPointEffect
# plot france
plot.fr.ci <- rbind(plot(mm.fr.carb$CausalImpactObject, "original")$data,
plot(mm.fr.juice$CausalImpactObject, "original")$data,
plot(mm.fr.water$CausalImpactObject, "original")$data,
plot(mm.fr.all$CausalImpactObject, "original")$data) %>%
mutate(cat = c(rep("SSBs", 15), rep("Juice", 15), rep("Bottled water", 15), rep("All soft drinks", 15)) )
plot.fr.ci$cat <- factor(plot.fr.ci$cat, levels = c("SSBs", "Juice", "Bottled water", "All soft drinks"))
p1.fr.ci <- ggplot(plot.fr.ci, aes(x = time, y = response)) +
facet_wrap(~cat, nrow=1) +
theme_bw() +
geom_ribbon(data=plot.fr.ci, aes(ymin=lower,ymax=upper), alpha=0.9, color=NA, fill = "#9ecae1") +
geom_vline(xintercept = as.Date("2012-01-01"), linetype = "dashed", color="darkgray") +
geom_line(aes(color = "observed")) +
geom_line(aes(y = mean, color = "counterfactual"), linetype = "dashed", size = 0.6) +
scale_color_manual(name = "Colors",
values = c("observed" = "black", "counterfactual" = "#3182bd")) +
theme(legend.position="top", legend.title=element_blank()) +
xlab("Years") +
ylab("Sales in million liters") +
ggtitle("France") + coord_cartesian(ylim = c(0, 15000))
p1.fr.ci
plot.fr.ci.att <- rbind(plot(mm.fr.carb$CausalImpactObject, "pointwise")$data,
plot(mm.fr.juice$CausalImpactObject, "pointwise")$data,
plot(mm.fr.water$CausalImpactObject, "pointwise")$data,
plot(mm.fr.all$CausalImpactObject, "pointwise")$data) %>%
mutate(cat = c(rep("SSBs", 15), rep("Juice", 15), rep("Bottled water", 15), rep("All soft drinks", 15)) )
plot.fr.ci.att$cat <- factor(plot.fr.ci.att$cat, levels = c("SSBs", "Juice", "Bottled water", "All soft drinks"))
p2.fr.ci <- ggplot(plot.fr.ci.att, aes(x = time, y = mean)) +
facet_wrap(~cat, nrow=1) +
theme_bw() +
geom_ribbon(data=plot.fr.ci.att, aes(ymin=lower,ymax=upper), alpha = 0.9, color=NA, fill = "#9ecae1") +
geom_hline(yintercept = 0, color="gray") +
geom_line(linetype="dashed", color = "#3182bd", size= 0.6) +
geom_vline(xintercept = as.Date("2012-01-01"), linetype = "dashed", color="darkgray") +
xlab("Years") +
ylab("ATT") + coord_cartesian(ylim = c(-1000, 3000))
p2.fr.ci
comb.fr.ci <- p1.fr.ci + p2.fr.ci + plot_layout(ncol = 1)
ggplot2::ggsave(filename="comb_fr_ci.pdf", plot = comb.fr.ci, width = 7, height = 7)
# hungary
#carbonates
mm.hu.carb.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>%
filter(category == "Carbonates" |
category == "Energy Drinks" |
category == "Sports Drinks",
geography != "France") %>%
group_by(geography, year) %>%
summarize(value = sum(value, na.rm = T)),
id_variable="geography",
date_variable="year",
matching_variable="value",
parallel=FALSE,
warping_limit=1, # warping limit=1
dtw_emphasis=1, # rely only on dtw for pre-screening
matches=16, # request 5 matches
start_match_period="2004-01-01",
end_match_period="2011-01-01")
mm.hu.carb <- MarketMatching::inference(matched_markets = mm.hu.carb.match,
analyze_betas=TRUE,
test_market = "Hungary",
control_matches = 10,
prior_level_sd=0.1,
end_post_period = "2018-10-01")
mm.hu.carb$PlotPointEffect
mm.hu.carb$CausalImpactObject
# juice
mm.hu.juice.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>%
filter(category == "Juice",
geography != "France"),
id_variable="geography",
date_variable="year",
matching_variable="value",
parallel=FALSE,
warping_limit=1, # warping limit=1
dtw_emphasis=1, # rely only on dtw for pre-screening
matches=16, # request 5 matches
start_match_period="2004-01-01",
end_match_period="2011-01-01")
mm.hu.juice <- MarketMatching::inference(matched_markets = mm.hu.juice.match,
analyze_betas=TRUE,
test_market = "Hungary",
control_matches = 10,
prior_level_sd=0.01,
end_post_period = "2018-10-01")
mm.hu.juice$PlotPointEffect
mm.hu.juice$CausalImpactObject
# bottled water
mm.hu.water.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>%
filter(category == "Bottled Water",
geography != "France"),
id_variable="geography",
date_variable="year",
matching_variable="value",
parallel=FALSE,
warping_limit=1, # warping limit=1
dtw_emphasis=1, # rely only on dtw for pre-screening
matches=16, # request 5 matches
start_match_period="2004-01-01",
end_match_period="2011-01-01")
mm.hu.water <- MarketMatching::inference(matched_markets = mm.hu.water.match,
analyze_betas=TRUE,
test_market = "Hungary",
control_matches = 10,
prior_level_sd=0.01,
end_post_period = "2018-10-01")
mm.hu.water$PlotPointEffect
mm.hu.water$CausalImpactObject
# all soft drink
mm.hu.all.match <- best_matches(data=df %>% mutate(year = lubridate::ymd(year, truncated = 2L)) %>% filter(geography != "France") %>%
group_by(geography, year) %>%
summarize(value = sum(value, na.rm = T)),
id_variable="geography",
date_variable="year",
matching_variable="value",
parallel=FALSE,
warping_limit=1, # warping limit=1
dtw_emphasis=1, # rely only on dtw for pre-screening
matches=16, # request 5 matches
start_match_period="2004-01-01",
end_match_period="2011-01-01")
mm.hu.all <- MarketMatching::inference(matched_markets = mm.hu.all.match,
analyze_betas=TRUE,
test_market = "Hungary",
control_matches = 10,
prior_level_sd=0.1,
end_post_period = "2018-10-01")
mm.hu.all$PlotPointEffect
mm.hu.all$CausalImpactObject
# plot hungary
plot.hu.ci <- rbind(plot(mm.hu.carb$CausalImpactObject, "original")$data,
plot(mm.hu.juice$CausalImpactObject, "original")$data,
plot(mm.hu.water$CausalImpactObject, "original")$data,
plot(mm.hu.all$CausalImpactObject, "original")$data) %>%
mutate(cat = c(rep("SSBs", 15), rep("Juice", 15), rep("Bottled water", 15), rep("All soft drinks", 15)) )
plot.hu.ci$cat <- factor(plot.hu.ci$cat, levels = c("SSBs", "Juice", "Bottled water", "All soft drinks"))
p1.hu.ci <- ggplot(plot.hu.ci, aes(x = time, y = response)) +
facet_wrap(~cat, nrow=1) +
theme_bw() +
geom_ribbon(data=plot.hu.ci, aes(ymin=lower,ymax=upper), alpha=0.9, color=NA, fill = "#a1d99b") +
geom_vline(xintercept = as.Date("2012-01-01"), linetype = "dashed", color="darkgray") +
geom_line(aes(color = "observed")) +
geom_line(aes(y = mean, color = "counterfactual"), linetype = "dashed", size = 0.6) +
scale_color_manual(name = "Colors",
values = c("observed" = "black", "counterfactual" = "#31a354")) +
theme(legend.position="top", legend.title=element_blank()) +
xlab("Years") +
ylab("Sales in million litres") +
ggtitle("Hungary") + coord_cartesian(ylim = c(0, 2500))
p1.hu.ci
plot.hu.ci.att <- rbind(plot(mm.hu.carb$CausalImpactObject, "pointwise")$data,
plot(mm.hu.juice$CausalImpactObject, "pointwise")$data,
plot(mm.hu.water$CausalImpactObject, "pointwise")$data,
plot(mm.hu.all$CausalImpactObject, "pointwise")$data) %>%
mutate(cat = c(rep("SSBs", 15), rep("Juice", 15), rep("Bottled water", 15), rep("All soft drinks", 15)) )
plot.hu.ci.att$cat <- factor(plot.hu.ci.att$cat, levels = c("SSBs", "Juice", "Bottled water", "All soft drinks"))
p2.hu.ci <- ggplot(plot.hu.ci.att, aes(x = time, y = mean)) +
facet_wrap(~cat, nrow=1) +
theme_bw() +
geom_ribbon(data=plot.hu.ci.att, aes(ymin=lower,ymax=upper), alpha = 0.9, color=NA, fill = "#a1d99b") +
geom_hline(yintercept = 0, color="gray") +
geom_line(linetype="dashed", color = "#31a354", size= 0.6) +
geom_vline(xintercept = as.Date("2012-01-01"), linetype = "dashed", color="darkgray") +
xlab("Years") +
ylab("ATT") + coord_cartesian(ylim = c(-1050, 1050))
p2.hu.ci
comb.hu.ci <- p1.hu.ci + p2.hu.ci + plot_layout(ncol = 1)
ggplot2::ggsave(filename="comb_hu_ci.pdf", plot = comb.hu.ci, width = 7, height = 7)
mm.hu.carb$CausalImpactObject
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment