library(naniar)
library(tidyverse)
riskfactors
#> # A tibble: 245 × 34
#> state sex age weight_lbs height_…¹ bmi marital pregn…² child…³ educa…⁴
#> <fct> <fct> <int> <int> <int> <dbl> <fct> <fct> <int> <fct>
#> 1 26 Female 49 190 64 32.7 Married <NA> 0 6
#> 2 40 Female 48 170 68 25.9 Divorc… <NA> 0 5
#> 3 72 Female 55 163 64 28.0 Married <NA> 0 4
#> 4 42 Male 42 230 74 29.6 Married <NA> 1 6
#> 5 32 Female 66 135 62 24.7 Widowed <NA> 0 5
#> 6 19 Male 66 165 70 23.7 Married <NA> 0 5
#> 7 45 Male 37 150 68 22.9 Married <NA> 3 6
#> 8 56 Female 62 170 70 24.4 NeverM… <NA> 0 6
#> 9 18 Male 38 146 70 21.0 Married <NA> 2 4
#> 10 8 Female 42 260 73 34.4 Separa… No 3 5
#> # … with 235 more rows, 24 more variables: employment <fct>, income <fct>,
#> # veteran <fct>, hispanic <fct>, health_general <fct>, health_physical <int>,
#> # health_mental <int>, health_poor <int>, health_cover <fct>,
#> # provide_care <fct>, activity_limited <fct>, drink_any <fct>,
#> # drink_days <int>, drink_average <int>, smoke_100 <fct>, smoke_days <fct>,
#> # smoke_stop <fct>, smoke_last <fct>, diet_fruit <int>, diet_salad <int>,
#> # diet_potato <int>, diet_carrot <int>, diet_vegetable <int>, …
#> # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
risk_summary <- riskfactors %>%
group_by(marital) %>%
summarise(
across(
.cols = where(is.numeric),
.fns = mean,
.names = "{.col}_mean",
na.rm = TRUE
)
)
risk_summary
#> # A tibble: 7 × 17
#> marital age_m…¹ weigh…² heigh…³ bmi_m…⁴ child…⁵ healt…⁶ healt…⁷ healt…⁸
#> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Married 55.9 177. 66.8 27.7 0.534 3.55 2.23 5.09
#> 2 Divorced 59.8 175. 66.4 28.1 0.205 6.49 5.74 7.88
#> 3 Widowed 74.6 156. 64.2 26.8 0.0732 5.02 0.927 4.82
#> 4 Separated 35 184. 67.3 27.9 2.67 1 16.7 17
#> 5 NeverMarried 49.2 181. 67.3 28.2 0.25 1.92 5.25 2.36
#> 6 UnmarriedCoup… 29.5 194. 65.7 31.5 1.5 1 6.67 2.5
#> 7 <NA> 59 195 67 30.6 0 30 0 15
#> # … with 8 more variables: drink_days_mean <dbl>, drink_average_mean <dbl>,
#> # diet_fruit_mean <dbl>, diet_salad_mean <dbl>, diet_potato_mean <dbl>,
#> # diet_carrot_mean <dbl>, diet_vegetable_mean <dbl>, diet_juice_mean <dbl>,
#> # and abbreviated variable names ¹age_mean, ²weight_lbs_mean,
#> # ³height_inch_mean, ⁴bmi_mean, ⁵children_mean, ⁶health_physical_mean,
#> # ⁷health_mental_mean, ⁸health_poor_mean
#> # ℹ Use `colnames()` to see all variable names
risk_summary_longer <- risk_summary %>%
pivot_longer(
cols = -marital,
names_to = "variable",
values_to = "value"
)
risk_summary_longer
#> # A tibble: 112 × 3
#> marital variable value
#> <fct> <chr> <dbl>
#> 1 Married age_mean 55.9
#> 2 Married weight_lbs_mean 177.
#> 3 Married height_inch_mean 66.8
#> 4 Married bmi_mean 27.7
#> 5 Married children_mean 0.534
#> 6 Married health_physical_mean 3.55
#> 7 Married health_mental_mean 2.23
#> 8 Married health_poor_mean 5.09
#> 9 Married drink_days_mean 9.33
#> 10 Married drink_average_mean 0.937
#> # … with 102 more rows
#> # ℹ Use `print(n = ...)` to see more rows
ggplot(risk_summary_longer,
aes(x = value,
y = variable)) +
geom_col() +
facet_wrap(~marital,
scales = "free")
#> Warning: Removed 2 rows containing missing values (position_stack).
library(broom)
library(palmerpenguins)
lm_fit <- lm(bill_length_mm ~ ., data = penguins)
lm_fit
#>
#> Call:
#> lm(formula = bill_length_mm ~ ., data = penguins)
#>
#> Coefficients:
#> (Intercept) speciesChinstrap speciesGentoo islandDream
#> -3.893e+02 9.910e+00 6.487e+00 -4.624e-01
#> islandTorgersen bill_depth_mm flipper_length_mm body_mass_g
#> -7.327e-02 3.272e-01 5.724e-02 1.136e-03
#> sexmale year
#> 2.054e+00 2.023e-01
tidy(lm_fit)
#> # A tibble: 10 × 5
#> term estimate std.error statistic p.value
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) -389. 326. -1.20 2.33e- 1
#> 2 speciesChinstrap 9.91 0.428 23.2 1.22e-70
#> 3 speciesGentoo 6.49 1.13 5.76 1.97e- 8
#> 4 islandDream -0.462 0.451 -1.02 3.06e- 1
#> 5 islandTorgersen -0.0733 0.472 -0.155 8.77e- 1
#> 6 bill_depth_mm 0.327 0.156 2.10 3.66e- 2
#> 7 flipper_length_mm 0.0572 0.0253 2.26 2.43e- 2
#> 8 body_mass_g 0.00114 0.000426 2.67 8.05e- 3
#> 9 sexmale 2.05 0.390 5.27 2.53e- 7
#> 10 year 0.202 0.163 1.24 2.15e- 1
glance(lm_fit)
#> # A tibble: 1 × 12
#> r.squared adj.r.squ…¹ sigma stati…² p.value df logLik AIC BIC devia…³
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.840 0.836 2.22 188. 7.36e-123 9 -733. 1487. 1529. 1588.
#> # … with 2 more variables: df.residual <int>, nobs <int>, and abbreviated
#> # variable names ¹adj.r.squared, ²statistic, ³deviance
#> # ℹ Use `colnames()` to see all variable names
augment(lm_fit)
#> # A tibble: 333 × 15
#> .rownames bill_l…¹ species island bill_…² flipp…³ body_…⁴ sex year .fitted
#> <chr> <dbl> <fct> <fct> <dbl> <int> <int> <fct> <int> <dbl>
#> 1 1 39.1 Adelie Torge… 18.7 181 3750 male 2007 39.4
#> 2 2 39.5 Adelie Torge… 17.4 186 3800 fema… 2007 37.3
#> 3 3 40.3 Adelie Torge… 18 195 3250 fema… 2007 37.4
#> 4 5 36.7 Adelie Torge… 19.3 193 3450 fema… 2007 37.9
#> 5 6 39.3 Adelie Torge… 20.6 190 3650 male 2007 40.4
#> 6 7 38.9 Adelie Torge… 17.8 181 3625 fema… 2007 36.9
#> 7 8 39.2 Adelie Torge… 19.6 195 4675 male 2007 41.6
#> 8 13 41.1 Adelie Torge… 17.6 182 3200 fema… 2007 36.4
#> 9 14 38.6 Adelie Torge… 21.2 191 3800 male 2007 40.9
#> 10 15 34.6 Adelie Torge… 21.1 198 4400 male 2007 41.9
#> # … with 323 more rows, 5 more variables: .resid <dbl>, .hat <dbl>,
#> # .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>, and abbreviated variable
#> # names ¹bill_length_mm, ²bill_depth_mm, ³flipper_length_mm, ⁴body_mass_g
#> # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
Created on 2022-08-18 by the reprex package (v2.0.1)
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.2.0 (2022-04-22)
#> os macOS Monterey 12.3.1
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_AU.UTF-8
#> ctype en_AU.UTF-8
#> tz Australia/Perth
#> date 2022-08-18
#> pandoc 2.18 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.0)
#> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0)
#> broom * 1.0.0 2022-07-01 [1] CRAN (R 4.2.0)
#> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.2.0)
#> cli 3.3.0.9000 2022-06-15 [1] Github (r-lib/cli@31a5db5)
#> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.0)
#> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.2.0)
#> curl 4.3.2 2021-06-23 [1] CRAN (R 4.2.0)
#> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.0)
#> dbplyr 2.2.1 2022-06-27 [1] CRAN (R 4.2.0)
#> digest 0.6.29 2021-12-01 [1] CRAN (R 4.2.0)
#> dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.2.0)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0)
#> evaluate 0.16 2022-08-09 [1] CRAN (R 4.2.0)
#> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0)
#> farver 2.1.1 2022-07-06 [1] CRAN (R 4.2.0)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.0)
#> forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.2.0)
#> fs 1.5.2 2021-12-08 [1] CRAN (R 4.2.0)
#> gargle 1.2.0 2021-07-02 [1] CRAN (R 4.2.0)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0)
#> ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.2.0)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0)
#> googledrive 2.0.0 2021-07-08 [1] CRAN (R 4.2.0)
#> googlesheets4 1.0.1 2022-08-13 [1] CRAN (R 4.2.0)
#> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.2.0)
#> haven 2.5.0 2022-04-15 [1] CRAN (R 4.2.0)
#> highr 0.9 2021-04-16 [1] CRAN (R 4.2.0)
#> hms 1.1.1 2021-09-26 [1] CRAN (R 4.2.0)
#> htmltools 0.5.3 2022-07-18 [1] CRAN (R 4.2.0)
#> httr 1.4.3 2022-05-04 [1] CRAN (R 4.2.0)
#> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.2.0)
#> knitr 1.39 2022-04-26 [1] CRAN (R 4.2.0)
#> labeling 0.4.2 2020-10-20 [1] CRAN (R 4.2.0)
#> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.2.0)
#> lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.2.0)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0)
#> mime 0.12 2021-09-28 [1] CRAN (R 4.2.0)
#> modelr 0.1.8 2020-05-19 [1] CRAN (R 4.2.0)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0)
#> naniar * 0.6.1 2021-05-14 [1] CRAN (R 4.2.0)
#> palmerpenguins * 0.1.1 2022-08-15 [1] CRAN (R 4.2.0)
#> pillar 1.8.0 2022-07-18 [1] CRAN (R 4.2.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0)
#> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.2.0)
#> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.2.0)
#> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.2.0)
#> R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.2.0)
#> R.utils 2.12.0 2022-06-28 [1] CRAN (R 4.2.0)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0)
#> readr * 2.1.2 2022-01-30 [1] CRAN (R 4.2.0)
#> readxl 1.4.0 2022-03-28 [1] CRAN (R 4.2.0)
#> reprex 2.0.1 2021-08-05 [1] CRAN (R 4.2.0)
#> rlang 1.0.4 2022-07-12 [1] CRAN (R 4.2.0)
#> rmarkdown 2.14 2022-04-25 [1] CRAN (R 4.2.0)
#> rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.2.0)
#> rvest 1.0.2 2021-10-16 [1] CRAN (R 4.2.0)
#> scales 1.2.0 2022-04-13 [1] CRAN (R 4.2.0)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0)
#> stringi 1.7.8 2022-07-11 [1] CRAN (R 4.2.0)
#> stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.2.0)
#> styler 1.7.0 2022-03-13 [1] CRAN (R 4.2.0)
#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0)
#> tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.2.0)
#> tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.2.0)
#> tidyverse * 1.3.2 2022-07-18 [1] CRAN (R 4.2.0)
#> tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.2.0)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0)
#> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.2.0)
#> visdat 0.5.3 2019-02-15 [1] CRAN (R 4.2.0)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0)
#> xfun 0.32.1 2022-08-11 [1] https://yihui.r-universe.dev (R 4.2.0)
#> xml2 1.3.3 2021-11-30 [1] CRAN (R 4.2.0)
#> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.2.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library
#>
#> ──────────────────────────────────────────────────────────────────────────────