andrewheiss/bayes-inspections.R

## bayes-inspections.R
library(tidyverse)
library(brms)
library(broom)
library(broom.mixed)
library(tidybayes)


# Inspections

# Do weekends cause inspectors to give higher scores?
inspections <- read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/causaldata/restaurant_inspections.csv")

model_naive <- lm(inspection_score ~ Weekend, data = inspections)
tidy(model_naive, conf.int = TRUE)

# We're 95% sure that the range 1.25 - 2.95 captures the true population average, which is fixed and certain

# But there are probably things that confound this relationship
model_adjusted <- lm(inspection_score ~ Weekend + NumberofLocations + Year, data = inspections)
tidy(model_adjusted, conf.int = TRUE)

ggplot(inspections, aes(x = Weekend, y = inspection_score)) +
  geom_point(position = position_jitter(height = 0),
             alpha = 0.5) +
  stat_summary(fun.data = "mean_se", fun.args = list(mult = 1.96))


results <- tidy(model_adjusted, conf.int = TRUE) %>%
  # filter(term != "(Intercept)")
  filter(term == "WeekendTRUE")

ggplot(results, aes(x = estimate, y = term)) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
  geom_vline(xintercept = 0)

# Again, we're 95% sure that that range captures the true population average.


model_bayes <- brm(bf(inspection_score ~ Weekend + NumberofLocations + Year),
                   data = inspections,
                   chains = 4, iter = 2000, cores = 4, seed = 1234, backend = "cmdstanr")
summary(model_bayes)
tidy(model_bayes)
tidy(model_bayes, conf.int = TRUE)

posterior_interval(model_bayes)

get_variables(model_bayes)

model_bayes %>%
  spread_draws(b_WeekendTRUE) %>%
  head(10)

inspections %>%
  group_by(Weekend) %>%
  summarize(avg = mean(inspection_score))

conditional_effects(model_bayes, effects = "Weekend")
conditional_effects(model_bayes, effects = "NumberofLocations", spaghetti = TRUE, ndraws = 100)

all_draws <- model_bayes %>%
  gather_draws(b_WeekendTRUE)

ggplot(all_draws, aes(y = .variable, x = .value)) +
  stat_halfeye()

all_draws <- model_bayes %>%
  spread_draws(b_WeekendTRUE)

ggplot(all_draws, aes(x = b_WeekendTRUE)) +
  stat_halfeye()

ggplot(all_draws, aes(x = b_WeekendTRUE)) +
  stat_dotsinterval(quantiles = 100)


# Penguins, for fun!
library(palmerpenguins)

penguins_clean <- penguins %>%
  filter(!is.na(sex))

peng_freq <- lm(body_mass_g ~ bill_length_mm + bill_depth_mm + flipper_length_mm + species, data = penguins_clean)
tidy(peng_freq, conf.int = TRUE)

peng_bayes <- brm(bf(body_mass_g ~ bill_length_mm + bill_depth_mm + flipper_length_mm + species),
                  data = penguins_clean,
                  chains = 4, iter = 2000, cores = 4, seed = 1234, backend = "cmdstanr")
tidy(peng_bayes, conf.int = TRUE)

plot(peng_bayes)

peng_draws <- peng_bayes %>%
  gather_draws(`b_.*`, regex = TRUE) %>%
  filter(.variable != "b_Intercept",
         !str_detect(.variable, "species"))

ggplot(peng_draws, aes(y = .variable, x = .value)) +
  stat_halfeye()
	library(tidyverse)
	library(brms)
	library(broom)
	library(broom.mixed)
	library(tidybayes)



	# Inspections

	# Do weekends cause inspectors to give higher scores?
	inspections <- read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/causaldata/restaurant_inspections.csv")

	model_naive <- lm(inspection_score ~ Weekend, data = inspections)
	tidy(model_naive, conf.int = TRUE)

	# We're 95% sure that the range 1.25 - 2.95 captures the true population average, which is fixed and certain

	# But there are probably things that confound this relationship
	model_adjusted <- lm(inspection_score ~ Weekend + NumberofLocations + Year, data = inspections)
	tidy(model_adjusted, conf.int = TRUE)

	ggplot(inspections, aes(x = Weekend, y = inspection_score)) +
	geom_point(position = position_jitter(height = 0),
	alpha = 0.5) +
	stat_summary(fun.data = "mean_se", fun.args = list(mult = 1.96))


	results <- tidy(model_adjusted, conf.int = TRUE) %>%
	# filter(term != "(Intercept)")
	filter(term == "WeekendTRUE")

	ggplot(results, aes(x = estimate, y = term)) +
	geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
	geom_vline(xintercept = 0)

	# Again, we're 95% sure that that range captures the true population average.


	model_bayes <- brm(bf(inspection_score ~ Weekend + NumberofLocations + Year),
	data = inspections,
	chains = 4, iter = 2000, cores = 4, seed = 1234, backend = "cmdstanr")
	summary(model_bayes)
	tidy(model_bayes)
	tidy(model_bayes, conf.int = TRUE)

	posterior_interval(model_bayes)

	get_variables(model_bayes)

	model_bayes %>%
	spread_draws(b_WeekendTRUE) %>%
	head(10)

	inspections %>%
	group_by(Weekend) %>%
	summarize(avg = mean(inspection_score))

	conditional_effects(model_bayes, effects = "Weekend")
	conditional_effects(model_bayes, effects = "NumberofLocations", spaghetti = TRUE, ndraws = 100)

	all_draws <- model_bayes %>%
	gather_draws(b_WeekendTRUE)

	ggplot(all_draws, aes(y = .variable, x = .value)) +
	stat_halfeye()

	all_draws <- model_bayes %>%
	spread_draws(b_WeekendTRUE)

	ggplot(all_draws, aes(x = b_WeekendTRUE)) +
	stat_halfeye()

	ggplot(all_draws, aes(x = b_WeekendTRUE)) +
	stat_dotsinterval(quantiles = 100)


	# Penguins, for fun!
	library(palmerpenguins)

	penguins_clean <- penguins %>%
	filter(!is.na(sex))

	peng_freq <- lm(body_mass_g ~ bill_length_mm + bill_depth_mm + flipper_length_mm + species, data = penguins_clean)
	tidy(peng_freq, conf.int = TRUE)

	peng_bayes <- brm(bf(body_mass_g ~ bill_length_mm + bill_depth_mm + flipper_length_mm + species),
	data = penguins_clean,
	chains = 4, iter = 2000, cores = 4, seed = 1234, backend = "cmdstanr")
	tidy(peng_bayes, conf.int = TRUE)

	plot(peng_bayes)

	peng_draws <- peng_bayes %>%
	gather_draws(`b_.*`, regex = TRUE) %>%
	filter(.variable != "b_Intercept",
	!str_detect(.variable, "species"))

	ggplot(peng_draws, aes(y = .variable, x = .value)) +
	stat_halfeye()