Skip to content

Instantly share code, notes, and snippets.

@ArtemGr
Last active April 6, 2021 21:03
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ArtemGr/ce53c3a0f378585f162c to your computer and use it in GitHub Desktop.
Save ArtemGr/ce53c3a0f378585f162c to your computer and use it in GitHub Desktop.
Example using JIT-compiled PCRE expressions from Rust.
pub struct PcreJit {
pub re: Mutex<pcre::Pcre>,
/// Map from a capturing group name to its number.
/// `None` if no named capturing groups found or if the `auto_capture` option is given to `PcreJit::new`.
pub names: Option<BTreeMap<String, Vec<usize>>>}
unsafe impl Sync for PcreJit {}
unsafe impl Send for PcreJit {}
impl PcreJit {
/// JIT-compiles the regular expression.
///
/// Reserves the space for the MARK if the pattern seems to use it.
///
/// * `auto_capture` - If `true` then `PcreJit::names` will remain `None`.
/// If `false` then only the named capturing groups work and `PcreJit::names` is filled with their names.
pub fn new (auto_capture: bool, pattern: &str) -> PcreJit {
let mut compile_options: enum_set::EnumSet<pcre::CompileOption> = enum_set::EnumSet::new();
if !auto_capture {
compile_options.insert (pcre::CompileOption::NoAutoCapture);} // Capture only the explicitly named groups.
compile_options.insert (pcre::CompileOption::DupNames); // Allow the same name to appear twice.
if pattern.contains ("(*MARK:") {
compile_options.insert (pcre::CompileOption::Extra);} // Reserve space for the "mark".
let mut re = pcre::Pcre::compile_with_options (pattern, &compile_options) .expect ("!PCRE");
let mut study_options: enum_set::EnumSet<pcre::StudyOption> = enum_set::EnumSet::new();
study_options.insert (pcre::StudyOption::StudyJitCompile);
if !re.study_with_options (&study_options) {panic! ("!PCRE study for {}", pattern)}
let names = if !auto_capture && re.name_count() > 0 {Some (re.name_table())} else {None};
PcreJit {re: Mutex::new (re), names: names}}
/// Run the compiled regular expression against the `subject`.
pub fn exec<'a> (&self, subject: &'a str) -> Result<Option<pcre::Match<'a>>, String> {
let mut re = try_s! (self.re.lock());
Ok (re.exec (subject))}
/// Run the compiled regular expression against the `subject` and capture the MARK too.
///
/// * `mark_map` - Must convert the short-living slice with the MARK into a less transient representation.
pub fn exec_with_mark<'a, MM, M> (&self, subject: &'a str, _: &Debug, mark_map: MM) -> Result<Option<(pcre::Match<'a>, M)>, String>
where MM: Fn (Option<&[u8]>) -> M {
let mut re = try_s! (self.re.lock());
if !re.enable_mark() {return ERR! ("!enable_mark")}
let cap = re.exec (subject);
Ok (match cap {
Some (cap) => Some ((cap, mark_map (re.mark_bytes()))),
None => None})}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment