Skip to content

Instantly share code, notes, and snippets.

@AlbertRapp
Created August 27, 2022 08:26
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save AlbertRapp/043c7941d0fa6478cbea55f45525befe to your computer and use it in GitHub Desktop.
Save AlbertRapp/043c7941d0fa6478cbea55f45525befe to your computer and use it in GitHub Desktop.
across_and_tidyselect
library(tidyverse)
# GOAL: Center and standardize every numeric column but `year` (by species and island)
palmerpenguins::penguins
# # A tibble: 344 x 8
# species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year
# <fct> <fct> <dbl> <dbl> <int> <int> <fct> <int>
# 1 Adelie Torgersen 39.1 18.7 181 3750 male 2007
# 2 Adelie Torgersen 39.5 17.4 186 3800 female 2007
# # ... with 342 more rows
# Helper function
center_and_standardize <- function(x) (x - mean(x, na.rm = T)) / sd(x, na.rm = T)
# THIS IS TEDIOUS
palmerpenguins::penguins |>
group_by(species, island) |>
mutate(
bill_length_mm = center_and_standardize(bill_length_mm),
bill_depth_mm = center_and_standardize(bill_depth_mm),
flipper_length_mm = center_and_standardize(flipper_length_mm),
body_mass_g = center_and_standardize(body_mass_g)
)
# # A tibble: 344 x 8
# # Groups: species, island [5]
# species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year
# <fct> <fct> <dbl> <dbl> <dbl> <dbl> <fct> <int>
# 1 Adelie Torgersen 0.0493 0.202 -1.64 0.0980 male 2007
# 2 Adelie Torgersen 0.181 -0.769 -0.834 0.210 female 2007
# # ... with 342 more rows
# Same result, less typing
palmerpenguins::penguins |>
group_by(species, island) |>
mutate(
across(
.cols = c(bill_length_mm, bill_depth_mm, flipper_length_mm, body_mass_g),
.fns = center_and_standardize
)
)
# Same result, even less typing (Alternative I)
palmerpenguins::penguins |>
group_by(species, island) |>
mutate(
across(
.cols = ends_with(c('_mm', '_g')),
.fns = center_and_standardize
)
)
# Same result, even less typing (Alternative II)
palmerpenguins::penguins |>
group_by(species, island) |>
mutate(
across(
.cols = matches(c('bill', 'flipper', 'body')),
.fns = center_and_standardize
)
)
# Same result, even less typing (Alternative III)
palmerpenguins::penguins |>
group_by(species, island) |>
mutate(
across(
.cols = c(where(is.numeric), -year),
.fns = center_and_standardize
)
)
# GOAL: Transform every character column to factor
mpg
# A tibble: 234 x 11
# manufacturer model displ year cyl trans drv cty hwy fl class
# <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
# 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
# 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
# 3 audi a4 2 2008 4 manual(m6) f 20 31 p compact
# 4 audi a4 2 2008 4 auto(av) f 21 30 p compact
# 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
# 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compact
# 7 audi a4 3.1 2008 6 auto(av) f 18 27 p compact
# 8 audi a4 quattro 1.8 1999 4 manual(m5) 4 18 26 p compact
# 9 audi a4 quattro 1.8 1999 4 auto(l5) 4 16 25 p compact
# 10 audi a4 quattro 2 2008 4 manual(m6) 4 20 28 p compact
# # ... with 224 more rows
# Doing it the tedious repetitive way
mpg |>
mutate(
manufacturer = factor(manufacturer),
model = factor(model),
trans = factor(trans),
drv = factor(drv),
fl = factor(drv),
class = factor(drv)
)
# # A tibble: 234 x 11
# manufacturer model displ year cyl trans drv cty hwy fl class
# <fct> <chr> <dbl> <int> <int> <fct> <fct> <int> <int> <fct> <fct>
# 1 audi a4 1.8 1999 4 auto(l5) f 18 29 f f
# 2 audi a4 1.8 1999 4 manual(m5) f 21 29 f f
# 3 audi a4 2 2008 4 manual(m6) f 20 31 f f
# 4 audi a4 2 2008 4 auto(av) f 21 30 f f
# 5 audi a4 2.8 1999 6 auto(l5) f 16 26 f f
# 6 audi a4 2.8 1999 6 manual(m5) f 18 26 f f
# 7 audi a4 3.1 2008 6 auto(av) f 18 27 f f
# 8 audi a4 quattro 1.8 1999 4 manual(m5) 4 18 26 4 4
# 9 audi a4 quattro 1.8 1999 4 auto(l5) 4 16 25 4 4
# 10 audi a4 quattro 2 2008 4 manual(m6) 4 20 28 4 4
# # ... with 224 more rows
# Same result, less typing
mpg |>
mutate(
across(
.cols = c(manufacturer, model, trans, drv, fl, class),
.fns = factor
)
)
# Same result, even less typing
mpg |>
mutate(
across(
.cols = where(is.character),
.fns = factor
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment