library(tidyverse)
# Proportion of different types of drives in the data
mpg %>%
group_by(drv) %>%
summarize(total = n()) %>%
# R automatically ungrouped - it'll always ungroup the last group (in this case drv)
# Percentage - sum(total) is based on the ungrouped 3-row dataset
mutate(prop = total / sum(total))
#> `summarise()` ungrouping output (override with `.groups` argument)
#> # A tibble: 3 x 3
#> drv total prop
#> <chr> <int> <dbl>
#> 1 4 103 0.440
#> 2 f 106 0.453
#> 3 r 25 0.107
# Proportion of different types of drives within years
mpg %>%
group_by(drv, year) %>%
summarize(total = n()) %>%
# R automatically ungroups the last layer (year), so now this is grouped by drv
# If we calculate the proportion now, each drv will sum to 1
mutate(prop = total / sum(total))
#> `summarise()` regrouping output by 'drv' (override with `.groups` argument)
#> # A tibble: 6 x 4
#> # Groups: drv [3]
#> drv year total prop
#> <chr> <int> <int> <dbl>
#> 1 4 1999 49 0.476
#> 2 4 2008 54 0.524
#> 3 f 1999 57 0.538
#> 4 f 2008 49 0.462
#> 5 r 1999 11 0.44
#> 6 r 2008 14 0.56
# If we explicitly ungroup, we can get the proportion to be across all 6 rows instead of within drv
mpg %>%
group_by(drv, year) %>%
summarize(total = n()) %>%
# Explicitly remove all behind-the-scenes groups and work on the whole dataset
ungroup() %>%
# Now the whole prop column will sum to 1
mutate(prop = total / sum(total))
#> # A tibble: 6 x 4
#> drv year total prop
#> <chr> <int> <int> <dbl>
#> 1 4 1999 49 0.209
#> 2 4 2008 54 0.231
#> 3 f 1999 57 0.244
#> 4 f 2008 49 0.209
#> 5 r 1999 11 0.0470
#> 6 r 2008 14 0.0598
Created on 2020-09-01 by the reprex package (v0.3.0)