Last active
July 13, 2020 21:25
-
-
Save francisbarton/89da88d3a199df701b7203985970e86b to your computer and use it in GitHub Desktop.
using if_else, mutate, map and reduce to create a data frame of columns meeting conditions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
``` r | |
library(dplyr, warn.conflicts = FALSE) | |
library(purrr) | |
library(rlang, warn.conflicts = FALSE) | |
library(stringr) | |
filenames <- c("coronavirus_cases_202007061134.csv", "coronavirus_cases_202007071134.csv", "coronavirus_cases_202007081134.csv") | |
cases <- c(1000, 1500, 2000) | |
# couple of functions | |
pull_date <- function(x) { | |
stringr::str_extract(x, "[:digit:]+") %>% # assumption that all digits are in a single string | |
stringr::str_trunc(width = 8, ellipsis = "") %>% # assumption that all dates are full 8 digits | |
as.numeric() | |
} | |
add_column <- function(df, filename, check_var, fill_var) { | |
df %>% | |
# '{{ }}' doesn't work here on LHS: '!!' required. See https://rlang.r-lib.org/reference/nse-force.html | |
# mutate({{ filename }} := if_else(pull_date(filename) >= {{ check_var }}, {{ fill_var }}, na_dbl)) | |
mutate(!!filename := if_else(pull_date(filename) >= {{ check_var }}, {{ fill_var }}, na_dbl)) | |
} | |
# this gives you a second column of just the dates in numeric | |
df_base <- dplyr::tibble(filenames, cases) %>% | |
dplyr::mutate(file_date = pull_date(filenames)) | |
# call add_column() for each filename in turn | |
# then combine all the results back into a single df | |
purrr::map(filenames, ~ add_column(df_base, ., file_date, cases)) %>% | |
purrr::reduce(left_join) | |
#> Joining, by = c("filenames", "cases", "file_date") | |
#> Joining, by = c("filenames", "cases", "file_date") | |
#> # A tibble: 3 x 6 | |
#> filenames cases file_date coronavirus_case~ coronavirus_cas~ coronavirus_cas~ | |
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> | |
#> 1 coronavir~ 1000 20200706 1000 1000 1000 | |
#> 2 coronavir~ 1500 20200707 NA 1500 1500 | |
#> 3 coronavir~ 2000 20200708 NA NA 2000 | |
``` | |
<sup>Created on 2020-07-13 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0)</sup> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment