Last active
November 14, 2022 22:18
-
-
Save grayskripko/ee994e01938c330fc27197a34ee94e27 to your computer and use it in GitHub Desktop.
tidyverse hints
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
?dplyr::context | |
# gives us | |
cur_column() - current column name! | |
cur_<press tab> | |
n() | |
disp <- 10 | |
mtcars %>% mutate(disp = .data$disp * .env$disp) | |
mtcars %>% mutate(disp = disp * disp) | |
?.data | |
?c_across | |
across(...., .names='{.col}_{.fn}') | |
# a pair of columns problem | |
# 1. there are pairs of columns like transaction_type _duration and _n | |
# we need to pick some ot the pairs, not all of them | |
# then divide every corresponding _dur to _n column | |
# solution 1 | |
reduce(striped col names, .init=df, .f= | |
~mutate(.x, '{.y}_dur_div_n' = get(glue('{.y}_dur')) / get(glue('{.y}_n')) ) | |
# solution 2 | |
mutate(across.. cur_column() | |
# name mae_nv n mae_gain | |
# <chr> <dbl> <dbl> <dbl> | |
# 1 adv 23.8 1123 0.036 | |
# 2 odds 23.8 1123 0.077 | |
# 3 adv_21+ 22.6 297 -0.022 | |
# 4 odds_21+ 22.6 297 0.028 | |
# group_by n, subtract mae_gain in each group | |
group_by(n) %>% | |
group_map(~deframe(mutate(., name=str_remove(name, '_.+'))) %>% | |
{.['adv'] - .['odds']}) | |
# 2. head(x, -2) is a very useful feature allowing to select all but 2 last | |
# values. It works even for whole tibbles. | |
# At the time of writing, unfortunately, there is no such alternative for grouped tibble | |
# applying it separately to each group. My solution | |
tibble(gr=c(1,1,1,2,2,2,2), val=1:7) %>% | |
group_by(gr) %>% | |
filter(n() - row_number() + 1 > 2) %>% | |
ungroup() | |
# 3. fast time cv can be done with nesting and lags | |
# 4. group_by()/rowwise() %>% group_map() is bad | |
# Especially when you need a grouping value in the result | |
# Replace it with nesting | |
df %>% | |
nest(data=-smth) %>% | |
mutate(res=map(data, your_func), .keep='unused') | |
# 5. to inject a vectorized lambda function in a long pipeline | |
1:4 %>% exec(as_mapper(~. + 1), .) | |
# 6. chr to tbl with delay, by batches | |
x %>% | |
enframe() %>% | |
group_by(batch=ceiling(name/10)) %>% | |
summarise(ids=list(value)) %>% | |
pluck('ids') %>% | |
imap_dfr(~... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment