Last active
September 25, 2020 01:17
-
-
Save francisbarton/9b5515e3d23d73a6e7d79946a226abf6 to your computer and use it in GitHub Desktop.
Rectangularise a set of list columns of different lengths
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# originally written at | |
# https://community.rstudio.com/t/unlist-columns-with-lists-of-different-length/ | |
# with grateful acknowledgement of those who worked it out before me :-) | |
## improved (well, single pipe) version, to summarise values in a df into a summary table | |
rectangularise <- function(df) { | |
df %>% | |
purrr::map(unique) %>% | |
purrr::map(sort) %>% | |
purrr::map(~ `length<-`(., max(lengths(map(df, ~ unique(.)))))) %>% | |
dplyr::bind_cols() | |
} | |
## rectangularise with added count columns (sorted) | |
################################################### | |
# helper | |
add_counts <- function(df) { | |
df %>% | |
dplyr::filter(!is.na(value)) %>% | |
dplyr::add_count(value, name = "count", sort = TRUE) %>% | |
dplyr::distinct() | |
} | |
rectangularise_with_counts <- function(df) { | |
col_names <- unlist( | |
map(colnames(df), ~ c(., paste0("count_", .))) | |
) | |
out <- df %>% | |
purrr::map(~.) %>% # makes a list of all df columns | |
# need to enframe so add_count can work | |
purrr::map(~ tibble::enframe(., name = NULL)) %>% | |
purrr::map(~ add_counts(.)) %>% | |
# turn into a flat list | |
purrr::flatten() | |
# can't manage to get it into a single pipeline :-/ | |
out %>% | |
map(~ `length<-`(., max(lengths(out)))) %>% | |
bind_cols() %>% | |
magrittr::set_colnames(col_names) | |
} | |
## original version (designed to deal with a tbl with nested lists) | |
################################################################### | |
rectangularise <- function(df) { | |
max_len <- df %>% | |
purrr::map(unlist) %>% | |
lengths() %>% | |
max() | |
df %>% | |
purrr::map(unlist) %>% | |
purrr::map(~ `length<-`(., max_len)) %>% | |
bind_cols() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment