Skip to content

Instantly share code, notes, and snippets.

@oganm
Created January 12, 2019 01:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oganm/e731cfc9bee93405ddff3feedf8f188b to your computer and use it in GitHub Desktop.
Save oganm/e731cfc9bee93405ddff3feedf8f188b to your computer and use it in GitHub Desktop.
miniparser
library(dplyr)
generatePatterns = function(patterns, # final outputs here
helperPatterns = NULL, # intermediate patterns that won't be returned here
.open= '{', .close = '}'){
for (i in seq_along(helperPatterns)){
helperPatterns[[i]] = with(helperPatterns[1:i],
{glue::glue(helperPatterns[[i]],
.open = .open,
.close = .close)})
}
for (i in seq_along(patterns)) {
patterns[[i]] =
with(c(helperPatterns, patterns[1:i]),
{glue::glue(patterns[[i]],
.open = .open,
.close =.close)
})
}
return((patterns))
}
parser = function(text,patterns){
patterns %>% lapply(function(x){
matches = gregexpr(x,text,perl = TRUE)
matches %>% lapply(function(y){
lengths = attributes(y)$match.length
starts = y
seq_along(lengths) %>% lapply(function(i){
seq(from = starts[i], to = starts[i] + lengths[i] -1)
})
}) %>% unlist(recursive = FALSE) -> out
return(out)
}) -> matches
if(matches %>% unlist() %>% duplicated %>% any){
badmatches = which(matches %>% unlist() %>% duplicated)
stop(paste(badmatches,collapse = ', '),' characters match multiple patterns')
}
missing = !(seq_len(nchar(text)) %in% (matches %>% unlist))
if(any(missing)){
matches$missing = as.list(which(missing))
}
flatMatches = matches %>% unlist(recursive=FALSE)
names(flatMatches) = names(matches) %>%
lapply(function(x){rep(x,length(matches[[x]]))}) %>% unlist
flatMatches = flatMatches[order(flatMatches %>% sapply(min))]
charSubsets = flatMatches %>% sapply(function(x){
substr(text,x[1],x[length(x)])
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment