library(tidyverse)
library(broom)
library(marginaleffects)
library(palmerpenguins)
penguins <- penguins %>% drop_na(sex)
model1 <- lm(body_mass_g ~ flipper_length_mm + species, data = penguins)
# Regular old regression coefficients
tidy(model1)
#> # A tibble: 4 × 5
#> term estimate std.error statistic p.value
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) -4013. 586. -6.85 3.74e-11
#> 2 flipper_length_mm 40.6 3.08 13.2 3.64e-32
#> 3 speciesChinstrap -205. 57.6 -3.57 4.14e- 4
#> 4 speciesGentoo 285. 95.4 2.98 3.08e- 3
# Marginal effects / slopes / contrasts
avg_slopes(model1)
#>
#> Term Contrast Estimate Std. Error z Pr(>|z|) S
#> flipper_length_mm dY/dX 40.6 3.08 13.19 < 0.001 129.5
#> species Chinstrap - Adelie -205.4 57.57 -3.57 < 0.001 11.4
#> species Gentoo - Adelie 284.5 95.43 2.98 0.00287 8.4
#> 2.5 % 97.5 %
#> 34.6 46.6
#> -318.2 -92.5
#> 97.5 471.6
#>
#> Columns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high
# ↑ These are identical to what you get with summary() or tidy(): 40.6, -205ish, 285ish
# The dy/dx means "slope"; the contrast means "difference between category level and baseline level"
# When you have nonlinear terms, the regular coefficients get gross and hard to interpret
model2 <- lm(body_mass_g ~ poly(flipper_length_mm, 2) + species, data = penguins)
# Regular old regression coefficients. I have no idea what the two coefficients for flipper length mean
tidy(model2)
#> # A tibble: 5 × 5
#> term estimate std.error statistic p.value
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) 4157. 45.0 92.5 5.16e-237
#> 2 poly(flipper_length_mm, 2)1 10770. 784. 13.7 3.05e- 34
#> 3 poly(flipper_length_mm, 2)2 1282. 385. 3.33 9.61e- 4
#> 4 speciesChinstrap -172. 57.6 -2.99 3.03e- 3
#> 5 speciesGentoo 238. 95.0 2.50 1.28e- 2
# These are interpretable though! The average slope is 40.3---a one mm increase
# in flipper length is associated with a 40.3 gram increase in body mass on average
avg_slopes(model2)
#>
#> Term Contrast Estimate Std. Error z Pr(>|z|) S
#> flipper_length_mm dY/dX 40.3 3.03 13.27 < 0.001 131.2
#> species Chinstrap - Adelie -172.0 57.58 -2.99 0.00282 8.5
#> species Gentoo - Adelie 237.8 95.04 2.50 0.01236 6.3
#> 2.5 % 97.5 %
#> 34.3 46.2
#> -284.8 -59.1
#> 51.5 424.0
#>
#> Columns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high
# And the -172 and 237 are the same that you get from the raw coefficients
# That's a single slope (40.3), but because we used a polynomial term, the line
# is technically curvy. You can see that with plot_predictions():
plot_predictions(model2, condition = "flipper_length_mm")
# You can choose arbitrary spots along the line to see what the slope is, like 180, 200, and 225 for fun
slopes(model2, newdata = datagrid(flipper_length_mm = c(180, 200, 225)))
#>
#> Term Contrast Estimate Std. Error z Pr(>|z|) S
#> flipper_length_mm dY/dX 24.5 5.71 4.29 < 0.001 15.8
#> flipper_length_mm dY/dX 39.6 3.05 12.97 < 0.001 125.4
#> flipper_length_mm dY/dX 58.4 6.14 9.51 < 0.001 68.8
#> species Chinstrap - Adelie -172.0 57.58 -2.99 0.00282 8.5
#> species Chinstrap - Adelie -172.0 57.58 -2.99 0.00282 8.5
#> species Chinstrap - Adelie -172.0 57.58 -2.99 0.00282 8.5
#> species Gentoo - Adelie 237.8 95.04 2.50 0.01236 6.3
#> species Gentoo - Adelie 237.8 95.04 2.50 0.01236 6.3
#> species Gentoo - Adelie 237.8 95.04 2.50 0.01236 6.3
#> 2.5 % 97.5 % species flipper_length_mm
#> 13.3 35.7 Adelie 180
#> 33.6 45.5 Adelie 200
#> 46.4 70.4 Adelie 225
#> -284.8 -59.1 Adelie 180
#> -284.8 -59.1 Adelie 200
#> -284.8 -59.1 Adelie 225
#> 51.5 424.0 Adelie 180
#> 51.5 424.0 Adelie 200
#> 51.5 424.0 Adelie 225
#>
#> Columns: rowid, term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted, predicted_hi, predicted_lo, body_mass_g, species, flipper_length_mm
# ↑ This shows a lot of extra detail, like the species differences in means at
# 180, 200, and 225, and those are all the same because those -172 and 237
# values are just shifts in intercepts or differences in averages, so there's no
# need to show them
slopes(model2, newdata = datagrid(flipper_length_mm = c(180, 200, 225)), variables = "flipper_length_mm")
#>
#> Term Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
#> flipper_length_mm 24.5 5.71 4.29 <0.001 15.8 13.3 35.7
#> flipper_length_mm 39.6 3.05 12.97 <0.001 125.4 33.6 45.5
#> flipper_length_mm 58.4 6.14 9.51 <0.001 68.8 46.4 70.4
#> species flipper_length_mm
#> Adelie 180
#> Adelie 200
#> Adelie 225
#>
#> Columns: rowid, term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted, predicted_hi, predicted_lo, body_mass_g, species, flipper_length_mm
# ↑ So here, when flipper length is small, the slope is smaller (24); when it's big, like 225, the slope is 58.4
plot_predictions(model2, condition = "flipper_length_mm") +
geom_vline(xintercept = c(180, 200, 225))
Created on 2023-11-06 with reprex v2.0.2