Created
March 21, 2019 17:51
-
-
Save tylerlittlefield/85fa285836063c74c83c0b56b5e56654 to your computer and use it in GitHub Desktop.
Hypothetical re-write of RVerbalExpressions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
#> | |
#> Attaching package: 'dplyr' | |
#> The following objects are masked from 'package:stats': | |
#> | |
#> filter, lag | |
#> The following objects are masked from 'package:base': | |
#> | |
#> intersect, setdiff, setequal, union | |
# Utilities | |
rx <- function() character(0L) | |
`~` <- function(...) { | |
args <- as.list(sys.call())[-1] | |
args <- paste0("rx() %>% ", args) | |
args <- sapply(args, function(x) eval(parse(text = x)), USE.NAMES = FALSE) | |
paste0(args, collapse = "") | |
} | |
vals <- `~` | |
sanitize <- function(x) { | |
if(missing(x)) | |
stop("The 'value' argument is missing. Did you forget to start the rx chain with rx()?") | |
esc <- c(".", "|", "*", "?", "+", "(", ")", "{", "}", "^", "$", "\\", ":", "=", "[", "]") | |
gsub(paste0("([\\", paste0(collapse = "\\", esc), "])"), "\\\\\\1", x, perl = TRUE) | |
} | |
# Literals | |
rx_literal <- function(.data, string) { | |
paste0(.data, sanitize(string)) | |
} | |
# Character classes/sets | |
rx_one_of <- function(.data, ...) { | |
paste0(.data, "[", ..., "]") | |
} | |
rx_none_of <- function(.data, ...) { | |
paste0(.data, "[^", ..., "]") | |
} | |
# The dot | |
rx_anything <- function(.data) { | |
paste0(.data, ".") | |
} | |
# Anchors | |
rx_start_of_line <- function(.data) { | |
paste0(.data, "^") | |
} | |
rx_end_of_line <- function(.data) { | |
paste0(.data, "$") | |
} | |
rx_group <- function(.data, ...) { | |
paste0(.data, "(", ..., ")") | |
} | |
rx_either_of <- function(.data, ...) { | |
paste0(.data, paste(..., sep = "|")) | |
} | |
rx() %>% | |
rx_literal("a") %>% | |
rx_one_of("b") %>% | |
rx_none_of("c") %>% | |
rx_anything() %>% | |
rx_start_of_line() %>% | |
rx_end_of_line() %>% | |
rx_group( | |
~ rx_literal("a"), | |
~ rx_none_of("b") | |
) %>% | |
rx_either_of("a", "b") | |
#> [1] "a[b][^c].^$(a[^b])a|b" | |
# with tilde | |
rx() %>% | |
rx_group( | |
~ rx_either_of("cat", "dog") | |
) %>% | |
rx_literal(" food") %>% | |
stringr::str_extract(c("cat food", "dog food", "fish food"), .) | |
#> [1] "cat food" "dog food" NA | |
# with vals | |
rx() %>% | |
rx_group( | |
vals(rx_either_of("cat", "dog")) | |
) %>% | |
rx_literal(" food") %>% | |
stringr::str_extract(c("cat food", "dog food", "fish food"), .) | |
#> [1] "cat food" "dog food" NA |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment