Skip to content

Instantly share code, notes, and snippets.

@andrewheiss
Created November 7, 2023 00:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewheiss/8ebdc301eb14f3654a2d22d54e9bbf55 to your computer and use it in GitHub Desktop.
Save andrewheiss/8ebdc301eb14f3654a2d22d54e9bbf55 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(broom)
library(marginaleffects)
library(palmerpenguins)

penguins <- penguins %>% drop_na(sex)

model1 <- lm(body_mass_g ~ flipper_length_mm + species, data = penguins)

# Regular old regression coefficients
tidy(model1)
#> # A tibble: 4 × 5
#>   term              estimate std.error statistic  p.value
#>   <chr>                <dbl>     <dbl>     <dbl>    <dbl>
#> 1 (Intercept)        -4013.     586.       -6.85 3.74e-11
#> 2 flipper_length_mm     40.6      3.08     13.2  3.64e-32
#> 3 speciesChinstrap    -205.      57.6      -3.57 4.14e- 4
#> 4 speciesGentoo        285.      95.4       2.98 3.08e- 3

# Marginal effects / slopes / contrasts
avg_slopes(model1)
#> 
#>               Term           Contrast Estimate Std. Error     z Pr(>|z|)     S
#>  flipper_length_mm dY/dX                  40.6       3.08 13.19  < 0.001 129.5
#>  species           Chinstrap - Adelie   -205.4      57.57 -3.57  < 0.001  11.4
#>  species           Gentoo - Adelie       284.5      95.43  2.98  0.00287   8.4
#>   2.5 % 97.5 %
#>    34.6   46.6
#>  -318.2  -92.5
#>    97.5  471.6
#> 
#> Columns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high

# ↑ These are identical to what you get with summary() or tidy(): 40.6, -205ish, 285ish

# The dy/dx means "slope"; the contrast means "difference between category level and baseline level"

# When you have nonlinear terms, the regular coefficients get gross and hard to interpret
model2 <- lm(body_mass_g ~ poly(flipper_length_mm, 2) + species, data = penguins)

# Regular old regression coefficients. I have no idea what the two coefficients for flipper length mean
tidy(model2)
#> # A tibble: 5 × 5
#>   term                        estimate std.error statistic   p.value
#>   <chr>                          <dbl>     <dbl>     <dbl>     <dbl>
#> 1 (Intercept)                    4157.      45.0     92.5  5.16e-237
#> 2 poly(flipper_length_mm, 2)1   10770.     784.      13.7  3.05e- 34
#> 3 poly(flipper_length_mm, 2)2    1282.     385.       3.33 9.61e-  4
#> 4 speciesChinstrap               -172.      57.6     -2.99 3.03e-  3
#> 5 speciesGentoo                   238.      95.0      2.50 1.28e-  2

# These are interpretable though! The average slope is 40.3---a one mm increase
# in flipper length is associated with a 40.3 gram increase in body mass on average
avg_slopes(model2)
#> 
#>               Term           Contrast Estimate Std. Error     z Pr(>|z|)     S
#>  flipper_length_mm dY/dX                  40.3       3.03 13.27  < 0.001 131.2
#>  species           Chinstrap - Adelie   -172.0      57.58 -2.99  0.00282   8.5
#>  species           Gentoo - Adelie       237.8      95.04  2.50  0.01236   6.3
#>   2.5 % 97.5 %
#>    34.3   46.2
#>  -284.8  -59.1
#>    51.5  424.0
#> 
#> Columns: term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high

# And the -172 and 237 are the same that you get from the raw coefficients

# That's a single slope (40.3), but because we used a polynomial term, the line
# is technically curvy. You can see that with plot_predictions():
plot_predictions(model2, condition = "flipper_length_mm")

# You can choose arbitrary spots along the line to see what the slope is, like 180, 200, and 225 for fun
slopes(model2, newdata = datagrid(flipper_length_mm = c(180, 200, 225)))
#> 
#>               Term           Contrast Estimate Std. Error     z Pr(>|z|)     S
#>  flipper_length_mm dY/dX                  24.5       5.71  4.29  < 0.001  15.8
#>  flipper_length_mm dY/dX                  39.6       3.05 12.97  < 0.001 125.4
#>  flipper_length_mm dY/dX                  58.4       6.14  9.51  < 0.001  68.8
#>  species           Chinstrap - Adelie   -172.0      57.58 -2.99  0.00282   8.5
#>  species           Chinstrap - Adelie   -172.0      57.58 -2.99  0.00282   8.5
#>  species           Chinstrap - Adelie   -172.0      57.58 -2.99  0.00282   8.5
#>  species           Gentoo - Adelie       237.8      95.04  2.50  0.01236   6.3
#>  species           Gentoo - Adelie       237.8      95.04  2.50  0.01236   6.3
#>  species           Gentoo - Adelie       237.8      95.04  2.50  0.01236   6.3
#>   2.5 % 97.5 % species flipper_length_mm
#>    13.3   35.7  Adelie               180
#>    33.6   45.5  Adelie               200
#>    46.4   70.4  Adelie               225
#>  -284.8  -59.1  Adelie               180
#>  -284.8  -59.1  Adelie               200
#>  -284.8  -59.1  Adelie               225
#>    51.5  424.0  Adelie               180
#>    51.5  424.0  Adelie               200
#>    51.5  424.0  Adelie               225
#> 
#> Columns: rowid, term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted, predicted_hi, predicted_lo, body_mass_g, species, flipper_length_mm

# ↑ This shows a lot of extra detail, like the species differences in means at
# 180, 200, and 225, and those are all the same because those -172 and 237
# values are just shifts in intercepts or differences in averages, so there's no
# need to show them
slopes(model2, newdata = datagrid(flipper_length_mm = c(180, 200, 225)), variables = "flipper_length_mm")
#> 
#>               Term Estimate Std. Error     z Pr(>|z|)     S 2.5 % 97.5 %
#>  flipper_length_mm     24.5       5.71  4.29   <0.001  15.8  13.3   35.7
#>  flipper_length_mm     39.6       3.05 12.97   <0.001 125.4  33.6   45.5
#>  flipper_length_mm     58.4       6.14  9.51   <0.001  68.8  46.4   70.4
#>  species flipper_length_mm
#>   Adelie               180
#>   Adelie               200
#>   Adelie               225
#> 
#> Columns: rowid, term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted, predicted_hi, predicted_lo, body_mass_g, species, flipper_length_mm

# ↑ So here, when flipper length is small, the slope is smaller (24); when it's big, like 225, the slope is 58.4
plot_predictions(model2, condition = "flipper_length_mm") +
  geom_vline(xintercept = c(180, 200, 225))

Created on 2023-11-06 with reprex v2.0.2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment