grayskripko/tidy_hint.R

## tidy_hint.R
?dplyr::context
# gives us
cur_column() - current column name!
cur_<press tab>
n()

disp <- 10
mtcars %>% mutate(disp = .data$disp * .env$disp)
mtcars %>% mutate(disp = disp * disp)
?.data

?c_across

across(...., .names='{.col}_{.fn}')

# a pair of columns problem
# 1. there are pairs of columns like transaction_type _duration and _n
# we need to pick some ot the pairs, not all of them
# then divide every corresponding _dur to _n column
# solution 1
reduce(striped col names, .init=df, .f=
       ~mutate(.x, '{.y}_dur_div_n' = get(glue('{.y}_dur')) / get(glue('{.y}_n')) )
# solution 2
mutate(across.. cur_column()

#   name     mae_nv     n mae_gain
#  <chr>     <dbl> <dbl>    <dbl>
# 1 adv        23.8  1123    0.036
# 2 odds       23.8  1123    0.077
# 3 adv_21+    22.6   297   -0.022
# 4 odds_21+   22.6   297    0.028
# group_by n, subtract mae_gain in each group
group_by(n) %>%
    group_map(~deframe(mutate(., name=str_remove(name, '_.+'))) %>%
                {.['adv'] - .['odds']})

# 2. head(x, -2) is a very useful feature allowing to select all but 2 last
# values. It works even for whole tibbles.
# At the time of writing, unfortunately, there is no such alternative for grouped tibble
# applying it separately to each group. My solution
tibble(gr=c(1,1,1,2,2,2,2), val=1:7) %>%
      group_by(gr) %>%
      filter(n() - row_number() + 1 > 2) %>%
      ungroup()

# 3. fast time cv can be done with nesting and lags

# 4. group_by()/rowwise() %>% group_map() is bad
# Especially when you need a grouping value in the result
# Replace it with nesting
df %>%
  nest(data=-smth) %>%
  mutate(res=map(data, your_func), .keep='unused')

# 5. to inject a vectorized lambda function in a long pipeline
1:4 %>% exec(as_mapper(~. + 1), .)

# 6. chr to tbl with delay, by batches
x %>%
       enframe() %>%
       group_by(batch=ceiling(name/10)) %>%
       summarise(ids=list(value)) %>%
       pluck('ids') %>%
       imap_dfr(~...
	?dplyr::context
	# gives us
	cur_column() - current column name!
	cur_<press tab>
	n()

	disp <- 10
	mtcars %>% mutate(disp = .data$disp * .env$disp)
	mtcars %>% mutate(disp = disp * disp)
	?.data

	?c_across

	across(...., .names='{.col}_{.fn}')

	# a pair of columns problem
	# 1. there are pairs of columns like transaction_type _duration and _n
	# we need to pick some ot the pairs, not all of them
	# then divide every corresponding _dur to _n column
	# solution 1
	reduce(striped col names, .init=df, .f=
	~mutate(.x, '{.y}_dur_div_n' = get(glue('{.y}_dur')) / get(glue('{.y}_n')) )
	# solution 2
	mutate(across.. cur_column()

	# name mae_nv n mae_gain
	# <chr> <dbl> <dbl> <dbl>
	# 1 adv 23.8 1123 0.036
	# 2 odds 23.8 1123 0.077
	# 3 adv_21+ 22.6 297 -0.022
	# 4 odds_21+ 22.6 297 0.028
	# group_by n, subtract mae_gain in each group
	group_by(n) %>%
	group_map(~deframe(mutate(., name=str_remove(name, '_.+'))) %>%
	{.['adv'] - .['odds']})

	# 2. head(x, -2) is a very useful feature allowing to select all but 2 last
	# values. It works even for whole tibbles.
	# At the time of writing, unfortunately, there is no such alternative for grouped tibble
	# applying it separately to each group. My solution
	tibble(gr=c(1,1,1,2,2,2,2), val=1:7) %>%
	group_by(gr) %>%
	filter(n() - row_number() + 1 > 2) %>%
	ungroup()

	# 3. fast time cv can be done with nesting and lags

	# 4. group_by()/rowwise() %>% group_map() is bad
	# Especially when you need a grouping value in the result
	# Replace it with nesting
	df %>%
	nest(data=-smth) %>%
	mutate(res=map(data, your_func), .keep='unused')

	# 5. to inject a vectorized lambda function in a long pipeline
	1:4 %>% exec(as_mapper(~. + 1), .)

	# 6. chr to tbl with delay, by batches
	x %>%
	enframe() %>%
	group_by(batch=ceiling(name/10)) %>%
	summarise(ids=list(value)) %>%
	pluck('ids') %>%
	imap_dfr(~...