Skip to content

Instantly share code, notes, and snippets.

@abicky
Last active August 21, 2016 14:42
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abicky/58ea79b01d9e394d5076 to your computer and use it in GitHub Desktop.
Save abicky/58ea79b01d9e394d5076 to your computer and use it in GitHub Desktop.
# Copyright 2015- Takeshi Arabiki
# License: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
library(Rcpp)
Sys.setenv(PKG_LIBS = "-lpcrecpp")
sourceCpp(code = '
#include <Rcpp.h> // Rcpp 0.11.3
#include <pcrecpp.h> // pcrecpp 8.36
// [[Rcpp::export]]
Rcpp::String regexquote(Rcpp::String pattern) {
return pcrecpp::RE::QuoteMeta(pattern.get_cstring());
}
class Regex {
public:
pcrecpp::RE re;
Regex(Rcpp::String pattern) : re("(" + std::string(pattern) + ")", pcrecpp::UTF8()) {
if (!re.error().empty()) {
Rcpp::stop("Invalid regular expression: " + re.error());
}
}
~Regex() {}
Rcpp::List scan(Rcpp::String str) {
std::vector<std::vector<std::string> > all_matches;
pcrecpp::StringPiece input(str.get_cstring());
int n = re.NumberOfCapturingGroups();
pcrecpp::Arg** args = new pcrecpp::Arg*[n];
pcrecpp::Arg* match_ptrs = new pcrecpp::Arg[n];
std::string* matches = new std::string[n];
for (int i = 0; i < n; ++i) {
match_ptrs[i] = &matches[i];
args[i] = &match_ptrs[i];
}
int consumed;
while (re.DoMatch(input, pcrecpp::RE::UNANCHORED, &consumed, args, n)) {
all_matches.push_back(std::vector<std::string>(matches, matches + n));
input.remove_prefix(consumed);
}
delete[] args;
delete[] match_ptrs;
delete[] matches;
return Rcpp::wrap(all_matches);
}
};
RCPP_MODULE(Regex) {
Rcpp::class_<Regex>("Regex")
.constructor<Rcpp::String>()
.method("scan", &Regex::scan)
;
}
')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment