Last active
April 15, 2023 19:19
-
-
Save avallecam/a3c08a0360af7b23b48cd75988bf924f to your computer and use it in GitHub Desktop.
Use across() and c_across() {dplyr} functions!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://dplyr.tidyverse.org/articles/rowwise.html | |
``` r | |
library(tidyverse) | |
# how to use across() ------------------------------------------------------- | |
# _ with summary functions ------------------------------------------------ | |
# from cheetsheet | |
# summarise(mtcars, across(everything(), mean)) | |
# traditionally, you can apply summary functions | |
# to vectors in a dataframe | |
mtcars %>% | |
as_tibble() %>% | |
summarise(mean(mpg)) | |
#> # A tibble: 1 × 1 | |
#> `mean(mpg)` | |
#> <dbl> | |
#> 1 20.1 | |
mtcars %>% | |
as_tibble() %>% | |
group_by(vs) %>% | |
summarise(mean(mpg)) | |
#> # A tibble: 2 × 2 | |
#> vs `mean(mpg)` | |
#> <dbl> <dbl> | |
#> 1 0 16.6 | |
#> 2 1 24.6 | |
# however, you can only apply it for only on | |
# variable at the time! | |
mtcars %>% | |
as_tibble() %>% | |
summarise(mean(mpg),mean(cyl)) | |
#> # A tibble: 1 × 2 | |
#> `mean(mpg)` `mean(cyl)` | |
#> <dbl> <dbl> | |
#> 1 20.1 6.19 | |
# for this, | |
# you can use across() to apply | |
# a summary function to multiple columns | |
mtcars %>% | |
as_tibble() %>% | |
summarise(across(.cols = c(mpg, cyl),.fns = mean)) | |
#> # A tibble: 1 × 2 | |
#> mpg cyl | |
#> <dbl> <dbl> | |
#> 1 20.1 6.19 | |
mtcars %>% | |
as_tibble() %>% | |
group_by(vs) %>% | |
summarise(across(.cols = c(mpg, cyl),.fns = mean)) | |
#> # A tibble: 2 × 3 | |
#> vs mpg cyl | |
#> <dbl> <dbl> <dbl> | |
#> 1 0 16.6 7.44 | |
#> 2 1 24.6 4.57 | |
# use select helpers for it! | |
mtcars %>% | |
as_tibble() %>% | |
group_by(vs) %>% | |
summarise(across(.cols = everything(),.fns = mean)) | |
#> # A tibble: 2 × 11 | |
#> vs mpg cyl disp hp drat wt qsec am gear carb | |
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> | |
#> 1 0 16.6 7.44 307. 190. 3.39 3.69 16.7 0.333 3.56 3.61 | |
#> 2 1 24.6 4.57 132. 91.4 3.86 2.61 19.3 0.5 3.86 1.79 | |
# _ with vectorise functions ---------------------------------------------- | |
# additionally | |
# if you want to apply one same | |
# vectorize function | |
# to multiple columns | |
# you require to apply it to each one | |
table4a %>% | |
janitor::clean_names() %>% | |
mutate(x1999 = as.character(x1999), | |
x2000 = as.character(x2000)) | |
#> # A tibble: 3 × 3 | |
#> country x1999 x2000 | |
#> <chr> <chr> <chr> | |
#> 1 Afghanistan 745 2666 | |
#> 2 Brazil 37737 80488 | |
#> 3 China 212258 213766 | |
# instead of repeating multiple lines | |
# for the same function, | |
# your could use: | |
table4a %>% | |
janitor::clean_names() %>% | |
mutate(across(.cols = -country,.fns = as.character)) | |
#> # A tibble: 3 × 3 | |
#> country x1999 x2000 | |
#> <chr> <chr> <chr> | |
#> 1 Afghanistan 745 2666 | |
#> 2 Brazil 37737 80488 | |
#> 3 China 212258 213766 | |
# also... | |
# for mathematical operations | |
# you could use the ~.x notation like this: | |
table4a %>% | |
janitor::clean_names() %>% | |
mutate(x1999 = x1999*2, | |
x2000 = x2000*2) | |
#> # A tibble: 3 × 3 | |
#> country x1999 x2000 | |
#> <chr> <dbl> <dbl> | |
#> 1 Afghanistan 1490 5332 | |
#> 2 Brazil 75474 160976 | |
#> 3 China 424516 427532 | |
# alternativelly, | |
# you can wrote less by using across | |
# also using select_helpers | |
# and avoid repeat variable names | |
table4a %>% | |
janitor::clean_names() %>% | |
mutate(across(.cols = -country,.fns = ~.x*2)) | |
#> # A tibble: 3 × 3 | |
#> country x1999 x2000 | |
#> <chr> <dbl> <dbl> | |
#> 1 Afghanistan 1490 5332 | |
#> 2 Brazil 75474 160976 | |
#> 3 China 424516 427532 | |
# how to use c_across() ----------------------------------------------------- | |
# _ with summary functions ------------------------------------------------ | |
# from cheetsheet | |
# transmute(rowwise(UKgas), total = sum(c_across(1:2))) | |
table4a %>% | |
janitor::clean_names() %>% | |
rowwise() %>% | |
mutate(sum = sum(c_across(cols = -country))) # warning: use `na.rm = T` to avoid missing results! | |
#> # A tibble: 3 × 4 | |
#> # Rowwise: | |
#> country x1999 x2000 sum | |
#> <chr> <int> <int> <int> | |
#> 1 Afghanistan 745 2666 3411 | |
#> 2 Brazil 37737 80488 118225 | |
#> 3 China 212258 213766 426024 | |
# and for applying rowwise | |
# multiple | |
# summary functions | |
table4a %>% | |
janitor::clean_names() %>% | |
rowwise() %>% | |
mutate(sum = sum(c_across(cols = -country)), # warning: use `na.rm = T` to avoid missing results! | |
sd = sd(c_across(cols = -country))) | |
#> # A tibble: 3 × 5 | |
#> # Rowwise: | |
#> country x1999 x2000 sum sd | |
#> <chr> <int> <int> <int> <dbl> | |
#> 1 Afghanistan 745 2666 3411 1376. | |
#> 2 Brazil 37737 80488 118225 40270. | |
#> 3 China 212258 213766 426024 122985. | |
``` | |
<sup>Created on 2022-03-29 by the [reprex package](https://reprex.tidyverse.org) (v2.0.1)</sup> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment