Created
August 27, 2022 08:26
-
-
Save AlbertRapp/043c7941d0fa6478cbea55f45525befe to your computer and use it in GitHub Desktop.
across_and_tidyselect
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# GOAL: Center and standardize every numeric column but `year` (by species and island) | |
palmerpenguins::penguins | |
# # A tibble: 344 x 8 | |
# species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year | |
# <fct> <fct> <dbl> <dbl> <int> <int> <fct> <int> | |
# 1 Adelie Torgersen 39.1 18.7 181 3750 male 2007 | |
# 2 Adelie Torgersen 39.5 17.4 186 3800 female 2007 | |
# # ... with 342 more rows | |
# Helper function | |
center_and_standardize <- function(x) (x - mean(x, na.rm = T)) / sd(x, na.rm = T) | |
# THIS IS TEDIOUS | |
palmerpenguins::penguins |> | |
group_by(species, island) |> | |
mutate( | |
bill_length_mm = center_and_standardize(bill_length_mm), | |
bill_depth_mm = center_and_standardize(bill_depth_mm), | |
flipper_length_mm = center_and_standardize(flipper_length_mm), | |
body_mass_g = center_and_standardize(body_mass_g) | |
) | |
# # A tibble: 344 x 8 | |
# # Groups: species, island [5] | |
# species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year | |
# <fct> <fct> <dbl> <dbl> <dbl> <dbl> <fct> <int> | |
# 1 Adelie Torgersen 0.0493 0.202 -1.64 0.0980 male 2007 | |
# 2 Adelie Torgersen 0.181 -0.769 -0.834 0.210 female 2007 | |
# # ... with 342 more rows | |
# Same result, less typing | |
palmerpenguins::penguins |> | |
group_by(species, island) |> | |
mutate( | |
across( | |
.cols = c(bill_length_mm, bill_depth_mm, flipper_length_mm, body_mass_g), | |
.fns = center_and_standardize | |
) | |
) | |
# Same result, even less typing (Alternative I) | |
palmerpenguins::penguins |> | |
group_by(species, island) |> | |
mutate( | |
across( | |
.cols = ends_with(c('_mm', '_g')), | |
.fns = center_and_standardize | |
) | |
) | |
# Same result, even less typing (Alternative II) | |
palmerpenguins::penguins |> | |
group_by(species, island) |> | |
mutate( | |
across( | |
.cols = matches(c('bill', 'flipper', 'body')), | |
.fns = center_and_standardize | |
) | |
) | |
# Same result, even less typing (Alternative III) | |
palmerpenguins::penguins |> | |
group_by(species, island) |> | |
mutate( | |
across( | |
.cols = c(where(is.numeric), -year), | |
.fns = center_and_standardize | |
) | |
) | |
# GOAL: Transform every character column to factor | |
mpg | |
# A tibble: 234 x 11 | |
# manufacturer model displ year cyl trans drv cty hwy fl class | |
# <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr> | |
# 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact | |
# 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact | |
# 3 audi a4 2 2008 4 manual(m6) f 20 31 p compact | |
# 4 audi a4 2 2008 4 auto(av) f 21 30 p compact | |
# 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact | |
# 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compact | |
# 7 audi a4 3.1 2008 6 auto(av) f 18 27 p compact | |
# 8 audi a4 quattro 1.8 1999 4 manual(m5) 4 18 26 p compact | |
# 9 audi a4 quattro 1.8 1999 4 auto(l5) 4 16 25 p compact | |
# 10 audi a4 quattro 2 2008 4 manual(m6) 4 20 28 p compact | |
# # ... with 224 more rows | |
# Doing it the tedious repetitive way | |
mpg |> | |
mutate( | |
manufacturer = factor(manufacturer), | |
model = factor(model), | |
trans = factor(trans), | |
drv = factor(drv), | |
fl = factor(drv), | |
class = factor(drv) | |
) | |
# # A tibble: 234 x 11 | |
# manufacturer model displ year cyl trans drv cty hwy fl class | |
# <fct> <chr> <dbl> <int> <int> <fct> <fct> <int> <int> <fct> <fct> | |
# 1 audi a4 1.8 1999 4 auto(l5) f 18 29 f f | |
# 2 audi a4 1.8 1999 4 manual(m5) f 21 29 f f | |
# 3 audi a4 2 2008 4 manual(m6) f 20 31 f f | |
# 4 audi a4 2 2008 4 auto(av) f 21 30 f f | |
# 5 audi a4 2.8 1999 6 auto(l5) f 16 26 f f | |
# 6 audi a4 2.8 1999 6 manual(m5) f 18 26 f f | |
# 7 audi a4 3.1 2008 6 auto(av) f 18 27 f f | |
# 8 audi a4 quattro 1.8 1999 4 manual(m5) 4 18 26 4 4 | |
# 9 audi a4 quattro 1.8 1999 4 auto(l5) 4 16 25 4 4 | |
# 10 audi a4 quattro 2 2008 4 manual(m6) 4 20 28 4 4 | |
# # ... with 224 more rows | |
# Same result, less typing | |
mpg |> | |
mutate( | |
across( | |
.cols = c(manufacturer, model, trans, drv, fl, class), | |
.fns = factor | |
) | |
) | |
# Same result, even less typing | |
mpg |> | |
mutate( | |
across( | |
.cols = where(is.character), | |
.fns = factor | |
) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment