Skip to content

Instantly share code, notes, and snippets.

@njtierney
Last active August 18, 2022 07:00
Show Gist options
  • Save njtierney/42f6ae807126d2ba5b0ce5d8b33542c1 to your computer and use it in GitHub Desktop.
Save njtierney/42f6ae807126d2ba5b0ce5d8b33542c1 to your computer and use it in GitHub Desktop.
library(naniar)
library(tidyverse)

riskfactors
#> # A tibble: 245 × 34
#>    state sex      age weight_lbs height_…¹   bmi marital pregn…² child…³ educa…⁴
#>    <fct> <fct>  <int>      <int>     <int> <dbl> <fct>   <fct>     <int> <fct>  
#>  1 26    Female    49        190        64  32.7 Married <NA>          0 6      
#>  2 40    Female    48        170        68  25.9 Divorc… <NA>          0 5      
#>  3 72    Female    55        163        64  28.0 Married <NA>          0 4      
#>  4 42    Male      42        230        74  29.6 Married <NA>          1 6      
#>  5 32    Female    66        135        62  24.7 Widowed <NA>          0 5      
#>  6 19    Male      66        165        70  23.7 Married <NA>          0 5      
#>  7 45    Male      37        150        68  22.9 Married <NA>          3 6      
#>  8 56    Female    62        170        70  24.4 NeverM… <NA>          0 6      
#>  9 18    Male      38        146        70  21.0 Married <NA>          2 4      
#> 10 8     Female    42        260        73  34.4 Separa… No            3 5      
#> # … with 235 more rows, 24 more variables: employment <fct>, income <fct>,
#> #   veteran <fct>, hispanic <fct>, health_general <fct>, health_physical <int>,
#> #   health_mental <int>, health_poor <int>, health_cover <fct>,
#> #   provide_care <fct>, activity_limited <fct>, drink_any <fct>,
#> #   drink_days <int>, drink_average <int>, smoke_100 <fct>, smoke_days <fct>,
#> #   smoke_stop <fct>, smoke_last <fct>, diet_fruit <int>, diet_salad <int>,
#> #   diet_potato <int>, diet_carrot <int>, diet_vegetable <int>, …
#> # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names

risk_summary <- riskfactors %>% 
  group_by(marital) %>% 
  summarise(
    across(
      .cols = where(is.numeric),
      .fns = mean,
      .names = "{.col}_mean",
      na.rm = TRUE
    )
  )

risk_summary
#> # A tibble: 7 × 17
#>   marital        age_m…¹ weigh…² heigh…³ bmi_m…⁴ child…⁵ healt…⁶ healt…⁷ healt…⁸
#>   <fct>            <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1 Married           55.9    177.    66.8    27.7  0.534     3.55   2.23     5.09
#> 2 Divorced          59.8    175.    66.4    28.1  0.205     6.49   5.74     7.88
#> 3 Widowed           74.6    156.    64.2    26.8  0.0732    5.02   0.927    4.82
#> 4 Separated         35      184.    67.3    27.9  2.67      1     16.7     17   
#> 5 NeverMarried      49.2    181.    67.3    28.2  0.25      1.92   5.25     2.36
#> 6 UnmarriedCoup…    29.5    194.    65.7    31.5  1.5       1      6.67     2.5 
#> 7 <NA>              59      195     67      30.6  0        30      0       15   
#> # … with 8 more variables: drink_days_mean <dbl>, drink_average_mean <dbl>,
#> #   diet_fruit_mean <dbl>, diet_salad_mean <dbl>, diet_potato_mean <dbl>,
#> #   diet_carrot_mean <dbl>, diet_vegetable_mean <dbl>, diet_juice_mean <dbl>,
#> #   and abbreviated variable names ¹​age_mean, ²​weight_lbs_mean,
#> #   ³​height_inch_mean, ⁴​bmi_mean, ⁵​children_mean, ⁶​health_physical_mean,
#> #   ⁷​health_mental_mean, ⁸​health_poor_mean
#> # ℹ Use `colnames()` to see all variable names

risk_summary_longer <- risk_summary %>% 
  pivot_longer(
    cols = -marital,
    names_to = "variable",
    values_to = "value"
  )

risk_summary_longer
#> # A tibble: 112 × 3
#>    marital variable               value
#>    <fct>   <chr>                  <dbl>
#>  1 Married age_mean              55.9  
#>  2 Married weight_lbs_mean      177.   
#>  3 Married height_inch_mean      66.8  
#>  4 Married bmi_mean              27.7  
#>  5 Married children_mean          0.534
#>  6 Married health_physical_mean   3.55 
#>  7 Married health_mental_mean     2.23 
#>  8 Married health_poor_mean       5.09 
#>  9 Married drink_days_mean        9.33 
#> 10 Married drink_average_mean     0.937
#> # … with 102 more rows
#> # ℹ Use `print(n = ...)` to see more rows

  ggplot(risk_summary_longer,
         aes(x = value,
             y = variable)) + 
  geom_col() +
  facet_wrap(~marital,
             scales = "free")
#> Warning: Removed 2 rows containing missing values (position_stack).

library(broom)
library(palmerpenguins)

lm_fit <- lm(bill_length_mm ~ ., data = penguins)

lm_fit
#> 
#> Call:
#> lm(formula = bill_length_mm ~ ., data = penguins)
#> 
#> Coefficients:
#>       (Intercept)   speciesChinstrap      speciesGentoo        islandDream  
#>        -3.893e+02          9.910e+00          6.487e+00         -4.624e-01  
#>   islandTorgersen      bill_depth_mm  flipper_length_mm        body_mass_g  
#>        -7.327e-02          3.272e-01          5.724e-02          1.136e-03  
#>           sexmale               year  
#>         2.054e+00          2.023e-01

tidy(lm_fit)
#> # A tibble: 10 × 5
#>    term                estimate  std.error statistic  p.value
#>    <chr>                  <dbl>      <dbl>     <dbl>    <dbl>
#>  1 (Intercept)       -389.      326.          -1.20  2.33e- 1
#>  2 speciesChinstrap     9.91      0.428       23.2   1.22e-70
#>  3 speciesGentoo        6.49      1.13         5.76  1.97e- 8
#>  4 islandDream         -0.462     0.451       -1.02  3.06e- 1
#>  5 islandTorgersen     -0.0733    0.472       -0.155 8.77e- 1
#>  6 bill_depth_mm        0.327     0.156        2.10  3.66e- 2
#>  7 flipper_length_mm    0.0572    0.0253       2.26  2.43e- 2
#>  8 body_mass_g          0.00114   0.000426     2.67  8.05e- 3
#>  9 sexmale              2.05      0.390        5.27  2.53e- 7
#> 10 year                 0.202     0.163        1.24  2.15e- 1
glance(lm_fit)
#> # A tibble: 1 × 12
#>   r.squared adj.r.squ…¹ sigma stati…²   p.value    df logLik   AIC   BIC devia…³
#>       <dbl>       <dbl> <dbl>   <dbl>     <dbl> <dbl>  <dbl> <dbl> <dbl>   <dbl>
#> 1     0.840       0.836  2.22    188. 7.36e-123     9  -733. 1487. 1529.   1588.
#> # … with 2 more variables: df.residual <int>, nobs <int>, and abbreviated
#> #   variable names ¹​adj.r.squared, ²​statistic, ³​deviance
#> # ℹ Use `colnames()` to see all variable names
augment(lm_fit)
#> # A tibble: 333 × 15
#>    .rownames bill_l…¹ species island bill_…² flipp…³ body_…⁴ sex    year .fitted
#>    <chr>        <dbl> <fct>   <fct>    <dbl>   <int>   <int> <fct> <int>   <dbl>
#>  1 1             39.1 Adelie  Torge…    18.7     181    3750 male   2007    39.4
#>  2 2             39.5 Adelie  Torge…    17.4     186    3800 fema…  2007    37.3
#>  3 3             40.3 Adelie  Torge…    18       195    3250 fema…  2007    37.4
#>  4 5             36.7 Adelie  Torge…    19.3     193    3450 fema…  2007    37.9
#>  5 6             39.3 Adelie  Torge…    20.6     190    3650 male   2007    40.4
#>  6 7             38.9 Adelie  Torge…    17.8     181    3625 fema…  2007    36.9
#>  7 8             39.2 Adelie  Torge…    19.6     195    4675 male   2007    41.6
#>  8 13            41.1 Adelie  Torge…    17.6     182    3200 fema…  2007    36.4
#>  9 14            38.6 Adelie  Torge…    21.2     191    3800 male   2007    40.9
#> 10 15            34.6 Adelie  Torge…    21.1     198    4400 male   2007    41.9
#> # … with 323 more rows, 5 more variables: .resid <dbl>, .hat <dbl>,
#> #   .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>, and abbreviated variable
#> #   names ¹​bill_length_mm, ²​bill_depth_mm, ³​flipper_length_mm, ⁴​body_mass_g
#> # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names

Created on 2022-08-18 by the reprex package (v2.0.1)

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.2.0 (2022-04-22)
#>  os       macOS Monterey 12.3.1
#>  system   aarch64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_AU.UTF-8
#>  ctype    en_AU.UTF-8
#>  tz       Australia/Perth
#>  date     2022-08-18
#>  pandoc   2.18 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package        * version    date (UTC) lib source
#>  assertthat       0.2.1      2019-03-21 [1] CRAN (R 4.2.0)
#>  backports        1.4.1      2021-12-13 [1] CRAN (R 4.2.0)
#>  broom          * 1.0.0      2022-07-01 [1] CRAN (R 4.2.0)
#>  cellranger       1.1.0      2016-07-27 [1] CRAN (R 4.2.0)
#>  cli              3.3.0.9000 2022-06-15 [1] Github (r-lib/cli@31a5db5)
#>  colorspace       2.0-3      2022-02-21 [1] CRAN (R 4.2.0)
#>  crayon           1.5.1      2022-03-26 [1] CRAN (R 4.2.0)
#>  curl             4.3.2      2021-06-23 [1] CRAN (R 4.2.0)
#>  DBI              1.1.3      2022-06-18 [1] CRAN (R 4.2.0)
#>  dbplyr           2.2.1      2022-06-27 [1] CRAN (R 4.2.0)
#>  digest           0.6.29     2021-12-01 [1] CRAN (R 4.2.0)
#>  dplyr          * 1.0.9      2022-04-28 [1] CRAN (R 4.2.0)
#>  ellipsis         0.3.2      2021-04-29 [1] CRAN (R 4.2.0)
#>  evaluate         0.16       2022-08-09 [1] CRAN (R 4.2.0)
#>  fansi            1.0.3      2022-03-24 [1] CRAN (R 4.2.0)
#>  farver           2.1.1      2022-07-06 [1] CRAN (R 4.2.0)
#>  fastmap          1.1.0      2021-01-25 [1] CRAN (R 4.2.0)
#>  forcats        * 0.5.1      2021-01-27 [1] CRAN (R 4.2.0)
#>  fs               1.5.2      2021-12-08 [1] CRAN (R 4.2.0)
#>  gargle           1.2.0      2021-07-02 [1] CRAN (R 4.2.0)
#>  generics         0.1.3      2022-07-05 [1] CRAN (R 4.2.0)
#>  ggplot2        * 3.3.6      2022-05-03 [1] CRAN (R 4.2.0)
#>  glue             1.6.2      2022-02-24 [1] CRAN (R 4.2.0)
#>  googledrive      2.0.0      2021-07-08 [1] CRAN (R 4.2.0)
#>  googlesheets4    1.0.1      2022-08-13 [1] CRAN (R 4.2.0)
#>  gtable           0.3.0      2019-03-25 [1] CRAN (R 4.2.0)
#>  haven            2.5.0      2022-04-15 [1] CRAN (R 4.2.0)
#>  highr            0.9        2021-04-16 [1] CRAN (R 4.2.0)
#>  hms              1.1.1      2021-09-26 [1] CRAN (R 4.2.0)
#>  htmltools        0.5.3      2022-07-18 [1] CRAN (R 4.2.0)
#>  httr             1.4.3      2022-05-04 [1] CRAN (R 4.2.0)
#>  jsonlite         1.8.0      2022-02-22 [1] CRAN (R 4.2.0)
#>  knitr            1.39       2022-04-26 [1] CRAN (R 4.2.0)
#>  labeling         0.4.2      2020-10-20 [1] CRAN (R 4.2.0)
#>  lifecycle        1.0.1      2021-09-24 [1] CRAN (R 4.2.0)
#>  lubridate        1.8.0      2021-10-07 [1] CRAN (R 4.2.0)
#>  magrittr         2.0.3      2022-03-30 [1] CRAN (R 4.2.0)
#>  mime             0.12       2021-09-28 [1] CRAN (R 4.2.0)
#>  modelr           0.1.8      2020-05-19 [1] CRAN (R 4.2.0)
#>  munsell          0.5.0      2018-06-12 [1] CRAN (R 4.2.0)
#>  naniar         * 0.6.1      2021-05-14 [1] CRAN (R 4.2.0)
#>  palmerpenguins * 0.1.1      2022-08-15 [1] CRAN (R 4.2.0)
#>  pillar           1.8.0      2022-07-18 [1] CRAN (R 4.2.0)
#>  pkgconfig        2.0.3      2019-09-22 [1] CRAN (R 4.2.0)
#>  purrr          * 0.3.4      2020-04-17 [1] CRAN (R 4.2.0)
#>  R.cache          0.16.0     2022-07-21 [1] CRAN (R 4.2.0)
#>  R.methodsS3      1.8.2      2022-06-13 [1] CRAN (R 4.2.0)
#>  R.oo             1.25.0     2022-06-12 [1] CRAN (R 4.2.0)
#>  R.utils          2.12.0     2022-06-28 [1] CRAN (R 4.2.0)
#>  R6               2.5.1      2021-08-19 [1] CRAN (R 4.2.0)
#>  readr          * 2.1.2      2022-01-30 [1] CRAN (R 4.2.0)
#>  readxl           1.4.0      2022-03-28 [1] CRAN (R 4.2.0)
#>  reprex           2.0.1      2021-08-05 [1] CRAN (R 4.2.0)
#>  rlang            1.0.4      2022-07-12 [1] CRAN (R 4.2.0)
#>  rmarkdown        2.14       2022-04-25 [1] CRAN (R 4.2.0)
#>  rstudioapi       0.13       2020-11-12 [1] CRAN (R 4.2.0)
#>  rvest            1.0.2      2021-10-16 [1] CRAN (R 4.2.0)
#>  scales           1.2.0      2022-04-13 [1] CRAN (R 4.2.0)
#>  sessioninfo      1.2.2      2021-12-06 [1] CRAN (R 4.2.0)
#>  stringi          1.7.8      2022-07-11 [1] CRAN (R 4.2.0)
#>  stringr        * 1.4.0      2019-02-10 [1] CRAN (R 4.2.0)
#>  styler           1.7.0      2022-03-13 [1] CRAN (R 4.2.0)
#>  tibble         * 3.1.8      2022-07-22 [1] CRAN (R 4.2.0)
#>  tidyr          * 1.2.0      2022-02-01 [1] CRAN (R 4.2.0)
#>  tidyselect       1.1.2      2022-02-21 [1] CRAN (R 4.2.0)
#>  tidyverse      * 1.3.2      2022-07-18 [1] CRAN (R 4.2.0)
#>  tzdb             0.3.0      2022-03-28 [1] CRAN (R 4.2.0)
#>  utf8             1.2.2      2021-07-24 [1] CRAN (R 4.2.0)
#>  vctrs            0.4.1      2022-04-13 [1] CRAN (R 4.2.0)
#>  visdat           0.5.3      2019-02-15 [1] CRAN (R 4.2.0)
#>  withr            2.5.0      2022-03-03 [1] CRAN (R 4.2.0)
#>  xfun             0.32.1     2022-08-11 [1] https://yihui.r-universe.dev (R 4.2.0)
#>  xml2             1.3.3      2021-11-30 [1] CRAN (R 4.2.0)
#>  yaml             2.3.5      2022-02-21 [1] CRAN (R 4.2.0)
#> 
#>  [1] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment