library(tidyverse)
library(marginaleffects)
library(gapminder)
gapminder_2007 <- gapminder |>
filter(year == 2007)
# Use log() in the model formula
model <- lm(lifeExp ~ log(gdpPercap), data = gapminder_2007)
# Trying to find the slopes at these points
plot_predictions(model, condition = "gdpPercap") +
geom_vline(xintercept = c(500, 5000, 10000, 20000, 40000))
# If I feed non-logged GDP values into slopes(), those estimates look wrong
model |>
slopes(newdata = datagrid(gdpPercap = c(500, 5000, 10000, 20000, 40000)))
#>
#> Term gdpPercap Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
#> gdpPercap 500 0.01441 8.85e-04 16.3 <0.001 195.6 0.012672 0.016140
#> gdpPercap 5000 0.00144 8.85e-05 16.3 <0.001 195.6 0.001267 0.001614
#> gdpPercap 10000 0.00072 4.42e-05 16.3 <0.001 195.6 0.000634 0.000807
#> gdpPercap 20000 0.00036 2.21e-05 16.3 <0.001 195.5 0.000317 0.000404
#> gdpPercap 40000 0.00018 1.11e-05 16.3 <0.001 195.5 0.000158 0.000202
#>
#> Columns: rowid, term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, gdpPercap, predicted_lo, predicted_hi, predicted, lifeExp
#> Type: response
# If I feed logged GDP values into slopes(), these estimates seem right (but the
# gdpPercap column is ugly with values like 6.21, 8.52, etc.)
model |>
slopes(newdata = datagrid(gdpPercap = log(c(500, 5000, 10000, 20000, 40000))))
#>
#> Term gdpPercap Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
#> gdpPercap 6.21 1.226 0.0753 16.3 <0.001 195.6 1.078 1.373
#> gdpPercap 8.52 0.870 0.0534 16.3 <0.001 195.6 0.766 0.975
#> gdpPercap 9.21 0.801 0.0492 16.3 <0.001 195.6 0.705 0.898
#> gdpPercap 9.90 0.743 0.0456 16.3 <0.001 195.6 0.653 0.832
#> gdpPercap 10.60 0.692 0.0425 16.3 <0.001 195.6 0.609 0.776
#>
#> Columns: rowid, term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, gdpPercap, predicted_lo, predicted_hi, predicted, lifeExp
#> Type: response
# I can exponentiate the values, though:
model |>
slopes(newdata = datagrid(gdpPercap = log(c(500, 5000, 10000, 20000, 40000)))) |>
mutate(gdpPercap = exp(gdpPercap)) |>
as_tibble() |> # The changed column disappears from the data.table printing :shrug:
select(term, gdpPercap, estimate, std.error)
#> # A tibble: 5 × 4
#> term gdpPercap estimate std.error
#> <chr> <dbl> <dbl> <dbl>
#> 1 gdpPercap 500 1.23 0.0753
#> 2 gdpPercap 5000. 0.870 0.0534
#> 3 gdpPercap 10000. 0.801 0.0492
#> 4 gdpPercap 20000. 0.743 0.0456
#> 5 gdpPercap 40000. 0.692 0.0425
# BUUUUT it gets weird with predictions() too. It seems like non-logged values are needed here:
model |>
predictions(newdata = datagrid(gdpPercap = c(500, 5000, 10000, 20000, 40000)))
#>
#> gdpPercap Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
#> 500 49.7 1.219 40.8 <0.001 Inf 47.3 52.1
#> 5000 66.3 0.599 110.6 <0.001 Inf 65.1 67.5
#> 10000 71.3 0.653 109.2 <0.001 Inf 70.0 72.6
#> 20000 76.3 0.826 92.4 <0.001 Inf 74.7 77.9
#> 40000 81.3 1.061 76.6 <0.001 Inf 79.2 83.4
#>
#> Columns: rowid, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, gdpPercap, lifeExp
#> Type: response
# If they're logged, the predictions are wrong:
model |>
predictions(newdata = datagrid(gdpPercap = log(c(500, 5000, 10000, 20000, 40000))))
#>
#> gdpPercap Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
#> 6.21 18.1 3.06 5.91 <0.001 28.2 12.1 24.1
#> 8.52 20.4 2.93 6.97 <0.001 38.2 14.6 26.1
#> 9.21 20.9 2.89 7.24 <0.001 41.1 15.3 26.6
#> 9.90 21.5 2.86 7.51 <0.001 43.9 15.9 27.1
#> 10.60 22.0 2.83 7.75 <0.001 46.7 16.4 27.5
#>
#> Columns: rowid, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, gdpPercap, lifeExp
#> Type: response
Created on 2024-04-24 with reprex v2.0.2