Last active
April 6, 2021 21:03
-
-
Save ArtemGr/ce53c3a0f378585f162c to your computer and use it in GitHub Desktop.
Example using JIT-compiled PCRE expressions from Rust.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pub struct PcreJit { | |
pub re: Mutex<pcre::Pcre>, | |
/// Map from a capturing group name to its number. | |
/// `None` if no named capturing groups found or if the `auto_capture` option is given to `PcreJit::new`. | |
pub names: Option<BTreeMap<String, Vec<usize>>>} | |
unsafe impl Sync for PcreJit {} | |
unsafe impl Send for PcreJit {} | |
impl PcreJit { | |
/// JIT-compiles the regular expression. | |
/// | |
/// Reserves the space for the MARK if the pattern seems to use it. | |
/// | |
/// * `auto_capture` - If `true` then `PcreJit::names` will remain `None`. | |
/// If `false` then only the named capturing groups work and `PcreJit::names` is filled with their names. | |
pub fn new (auto_capture: bool, pattern: &str) -> PcreJit { | |
let mut compile_options: enum_set::EnumSet<pcre::CompileOption> = enum_set::EnumSet::new(); | |
if !auto_capture { | |
compile_options.insert (pcre::CompileOption::NoAutoCapture);} // Capture only the explicitly named groups. | |
compile_options.insert (pcre::CompileOption::DupNames); // Allow the same name to appear twice. | |
if pattern.contains ("(*MARK:") { | |
compile_options.insert (pcre::CompileOption::Extra);} // Reserve space for the "mark". | |
let mut re = pcre::Pcre::compile_with_options (pattern, &compile_options) .expect ("!PCRE"); | |
let mut study_options: enum_set::EnumSet<pcre::StudyOption> = enum_set::EnumSet::new(); | |
study_options.insert (pcre::StudyOption::StudyJitCompile); | |
if !re.study_with_options (&study_options) {panic! ("!PCRE study for {}", pattern)} | |
let names = if !auto_capture && re.name_count() > 0 {Some (re.name_table())} else {None}; | |
PcreJit {re: Mutex::new (re), names: names}} | |
/// Run the compiled regular expression against the `subject`. | |
pub fn exec<'a> (&self, subject: &'a str) -> Result<Option<pcre::Match<'a>>, String> { | |
let mut re = try_s! (self.re.lock()); | |
Ok (re.exec (subject))} | |
/// Run the compiled regular expression against the `subject` and capture the MARK too. | |
/// | |
/// * `mark_map` - Must convert the short-living slice with the MARK into a less transient representation. | |
pub fn exec_with_mark<'a, MM, M> (&self, subject: &'a str, _: &Debug, mark_map: MM) -> Result<Option<(pcre::Match<'a>, M)>, String> | |
where MM: Fn (Option<&[u8]>) -> M { | |
let mut re = try_s! (self.re.lock()); | |
if !re.enable_mark() {return ERR! ("!enable_mark")} | |
let cap = re.exec (subject); | |
Ok (match cap { | |
Some (cap) => Some ((cap, mark_map (re.mark_bytes()))), | |
None => None})}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment