avallecam/how_to_across_dplyr.R

## how_to_across_dplyr.R
# https://dplyr.tidyverse.org/articles/rowwise.html
``` r
library(tidyverse)

# how to use across() -------------------------------------------------------

# _ with summary functions ------------------------------------------------

# from cheetsheet
# summarise(mtcars, across(everything(), mean))

# traditionally, you can apply summary functions
# to vectors in a dataframe
mtcars %>%
  as_tibble() %>%
  summarise(mean(mpg))
#> # A tibble: 1 × 1
#>   `mean(mpg)`
#>         <dbl>
#> 1        20.1
mtcars %>%
  as_tibble() %>%
  group_by(vs) %>%
  summarise(mean(mpg))
#> # A tibble: 2 × 2
#>      vs `mean(mpg)`
#>   <dbl>       <dbl>
#> 1     0        16.6
#> 2     1        24.6

# however, you can only apply it for only on
# variable at the time!
mtcars %>%
  as_tibble() %>%
  summarise(mean(mpg),mean(cyl))
#> # A tibble: 1 × 2
#>   `mean(mpg)` `mean(cyl)`
#>         <dbl>       <dbl>
#> 1        20.1        6.19

# for this,
# you can use across() to apply
# a summary function to multiple columns
mtcars %>%
  as_tibble() %>%
  summarise(across(.cols = c(mpg, cyl),.fns = mean))
#> # A tibble: 1 × 2
#>     mpg   cyl
#>   <dbl> <dbl>
#> 1  20.1  6.19
mtcars %>%
  as_tibble() %>%
  group_by(vs) %>%
  summarise(across(.cols = c(mpg, cyl),.fns = mean))
#> # A tibble: 2 × 3
#>      vs   mpg   cyl
#>   <dbl> <dbl> <dbl>
#> 1     0  16.6  7.44
#> 2     1  24.6  4.57
# use select helpers for it!
mtcars %>%
  as_tibble() %>%
  group_by(vs) %>%
  summarise(across(.cols = everything(),.fns = mean))
#> # A tibble: 2 × 11
#>      vs   mpg   cyl  disp    hp  drat    wt  qsec    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1     0  16.6  7.44  307. 190.   3.39  3.69  16.7 0.333  3.56  3.61
#> 2     1  24.6  4.57  132.  91.4  3.86  2.61  19.3 0.5    3.86  1.79

# _ with vectorise functions ----------------------------------------------

# additionally

# if you want to apply one same
# vectorize function
# to multiple columns
# you require to apply it to each one
table4a %>%
  janitor::clean_names() %>%
  mutate(x1999 = as.character(x1999),
         x2000 = as.character(x2000))
#> # A tibble: 3 × 3
#>   country     x1999  x2000
#>   <chr>       <chr>  <chr>
#> 1 Afghanistan 745    2666
#> 2 Brazil      37737  80488
#> 3 China       212258 213766

# instead of repeating multiple lines
# for the same function,
# your could use:
table4a %>%
  janitor::clean_names() %>%
  mutate(across(.cols = -country,.fns = as.character))
#> # A tibble: 3 × 3
#>   country     x1999  x2000
#>   <chr>       <chr>  <chr>
#> 1 Afghanistan 745    2666
#> 2 Brazil      37737  80488
#> 3 China       212258 213766

# also...
# for mathematical operations
# you could use the ~.x notation like this:
table4a %>%
  janitor::clean_names() %>%
  mutate(x1999 = x1999*2,
         x2000 = x2000*2)
#> # A tibble: 3 × 3
#>   country      x1999  x2000
#>   <chr>        <dbl>  <dbl>
#> 1 Afghanistan   1490   5332
#> 2 Brazil       75474 160976
#> 3 China       424516 427532

# alternativelly,
# you can wrote less by using across
# also using select_helpers
# and avoid repeat variable names
table4a %>%
  janitor::clean_names() %>%
  mutate(across(.cols = -country,.fns = ~.x*2))
#> # A tibble: 3 × 3
#>   country      x1999  x2000
#>   <chr>        <dbl>  <dbl>
#> 1 Afghanistan   1490   5332
#> 2 Brazil       75474 160976
#> 3 China       424516 427532

# how to use c_across() -----------------------------------------------------

# _ with summary functions ------------------------------------------------

# from cheetsheet
# transmute(rowwise(UKgas), total = sum(c_across(1:2)))
table4a %>%
  janitor::clean_names() %>%
  rowwise() %>%
  mutate(sum = sum(c_across(cols = -country))) # warning: use `na.rm = T` to avoid missing results!
#> # A tibble: 3 × 4
#> # Rowwise:
#>   country      x1999  x2000    sum
#>   <chr>        <int>  <int>  <int>
#> 1 Afghanistan    745   2666   3411
#> 2 Brazil       37737  80488 118225
#> 3 China       212258 213766 426024

# and for applying rowwise
# multiple
# summary functions
table4a %>%
  janitor::clean_names() %>%
  rowwise() %>%
  mutate(sum = sum(c_across(cols = -country)), # warning: use `na.rm = T` to avoid missing results!
         sd = sd(c_across(cols = -country)))
#> # A tibble: 3 × 5
#> # Rowwise:
#>   country      x1999  x2000    sum      sd
#>   <chr>        <int>  <int>  <int>   <dbl>
#> 1 Afghanistan    745   2666   3411   1376.
#> 2 Brazil       37737  80488 118225  40270.
#> 3 China       212258 213766 426024 122985.
```

<sup>Created on 2022-03-29 by the [reprex package](https://reprex.tidyverse.org) (v2.0.1)</sup>
	# https://dplyr.tidyverse.org/articles/rowwise.html
	``` r
	library(tidyverse)

	# how to use across() -------------------------------------------------------

	# _ with summary functions ------------------------------------------------

	# from cheetsheet
	# summarise(mtcars, across(everything(), mean))

	# traditionally, you can apply summary functions
	# to vectors in a dataframe
	mtcars %>%
	as_tibble() %>%
	summarise(mean(mpg))
	#> # A tibble: 1 × 1
	#> `mean(mpg)`
	#> <dbl>
	#> 1 20.1
	mtcars %>%
	as_tibble() %>%
	group_by(vs) %>%
	summarise(mean(mpg))
	#> # A tibble: 2 × 2
	#> vs `mean(mpg)`
	#> <dbl> <dbl>
	#> 1 0 16.6
	#> 2 1 24.6

	# however, you can only apply it for only on
	# variable at the time!
	mtcars %>%
	as_tibble() %>%
	summarise(mean(mpg),mean(cyl))
	#> # A tibble: 1 × 2
	#> `mean(mpg)` `mean(cyl)`
	#> <dbl> <dbl>
	#> 1 20.1 6.19

	# for this,
	# you can use across() to apply
	# a summary function to multiple columns
	mtcars %>%
	as_tibble() %>%
	summarise(across(.cols = c(mpg, cyl),.fns = mean))
	#> # A tibble: 1 × 2
	#> mpg cyl
	#> <dbl> <dbl>
	#> 1 20.1 6.19
	mtcars %>%
	as_tibble() %>%
	group_by(vs) %>%
	summarise(across(.cols = c(mpg, cyl),.fns = mean))
	#> # A tibble: 2 × 3
	#> vs mpg cyl
	#> <dbl> <dbl> <dbl>
	#> 1 0 16.6 7.44
	#> 2 1 24.6 4.57
	# use select helpers for it!
	mtcars %>%
	as_tibble() %>%
	group_by(vs) %>%
	summarise(across(.cols = everything(),.fns = mean))
	#> # A tibble: 2 × 11
	#> vs mpg cyl disp hp drat wt qsec am gear carb
	#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
	#> 1 0 16.6 7.44 307. 190. 3.39 3.69 16.7 0.333 3.56 3.61
	#> 2 1 24.6 4.57 132. 91.4 3.86 2.61 19.3 0.5 3.86 1.79

	# _ with vectorise functions ----------------------------------------------

	# additionally

	# if you want to apply one same
	# vectorize function
	# to multiple columns
	# you require to apply it to each one
	table4a %>%
	janitor::clean_names() %>%
	mutate(x1999 = as.character(x1999),
	x2000 = as.character(x2000))
	#> # A tibble: 3 × 3
	#> country x1999 x2000
	#> <chr> <chr> <chr>
	#> 1 Afghanistan 745 2666
	#> 2 Brazil 37737 80488
	#> 3 China 212258 213766

	# instead of repeating multiple lines
	# for the same function,
	# your could use:
	table4a %>%
	janitor::clean_names() %>%
	mutate(across(.cols = -country,.fns = as.character))
	#> # A tibble: 3 × 3
	#> country x1999 x2000
	#> <chr> <chr> <chr>
	#> 1 Afghanistan 745 2666
	#> 2 Brazil 37737 80488
	#> 3 China 212258 213766

	# also...
	# for mathematical operations
	# you could use the ~.x notation like this:
	table4a %>%
	janitor::clean_names() %>%
	mutate(x1999 = x1999*2,
	x2000 = x2000*2)
	#> # A tibble: 3 × 3
	#> country x1999 x2000
	#> <chr> <dbl> <dbl>
	#> 1 Afghanistan 1490 5332
	#> 2 Brazil 75474 160976
	#> 3 China 424516 427532

	# alternativelly,
	# you can wrote less by using across
	# also using select_helpers
	# and avoid repeat variable names
	table4a %>%
	janitor::clean_names() %>%
	mutate(across(.cols = -country,.fns = ~.x*2))
	#> # A tibble: 3 × 3
	#> country x1999 x2000
	#> <chr> <dbl> <dbl>
	#> 1 Afghanistan 1490 5332
	#> 2 Brazil 75474 160976
	#> 3 China 424516 427532

	# how to use c_across() -----------------------------------------------------

	# _ with summary functions ------------------------------------------------

	# from cheetsheet
	# transmute(rowwise(UKgas), total = sum(c_across(1:2)))
	table4a %>%
	janitor::clean_names() %>%
	rowwise() %>%
	mutate(sum = sum(c_across(cols = -country))) # warning: use `na.rm = T` to avoid missing results!
	#> # A tibble: 3 × 4
	#> # Rowwise:
	#> country x1999 x2000 sum
	#> <chr> <int> <int> <int>
	#> 1 Afghanistan 745 2666 3411
	#> 2 Brazil 37737 80488 118225
	#> 3 China 212258 213766 426024

	# and for applying rowwise
	# multiple
	# summary functions
	table4a %>%
	janitor::clean_names() %>%
	rowwise() %>%
	mutate(sum = sum(c_across(cols = -country)), # warning: use `na.rm = T` to avoid missing results!
	sd = sd(c_across(cols = -country)))
	#> # A tibble: 3 × 5
	#> # Rowwise:
	#> country x1999 x2000 sum sd
	#> <chr> <int> <int> <int> <dbl>
	#> 1 Afghanistan 745 2666 3411 1376.
	#> 2 Brazil 37737 80488 118225 40270.
	#> 3 China 212258 213766 426024 122985.
	```

	<sup>Created on 2022-03-29 by the [reprex package](https://reprex.tidyverse.org) (v2.0.1)</sup>