Skip to content

Instantly share code, notes, and snippets.

@mo-xiaoming
Created October 29, 2022 02:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mo-xiaoming/9fb87da16d6ef459e1b94c16055b9978 to your computer and use it in GitHub Desktop.
Save mo-xiaoming/9fb87da16d6ef459e1b94c16055b9978 to your computer and use it in GitHub Desktop.
wildcards matching
//! https://www.drdobbs.com/architecture-and-design/matching-wildcards-an-algorithm/210200888
#![allow(dead_code)]
fn wildcard_match(pattern_text: &str, plain_text: &str) -> bool {
struct AfterWildcard {
plain_idx: usize,
pattern_idx: usize,
}
// it is None if there are no prev wildcard
// if there were, then `plain_idx` stores *next* index in plain text that wildcard supposes to match
// `pattern_idx` stores *next* index right after the wildcard
// when they first assigned
// latter they become to be the next possible non matches
// anything pre these two considered match
let mut after_wildcard: Option<AfterWildcard> = None;
// current indices moving in two strings
let mut cur_pos_plain_text = 0_usize;
let mut cur_pos_pattern_text = 0_usize;
loop {
// current chars in two strings
let plain_c = plain_text.chars().nth(cur_pos_plain_text);
let pattern_c = pattern_text.chars().nth(cur_pos_pattern_text);
if plain_c.is_none() {
// plain text ends
match pattern_c {
None => return true, // pattern text ends as well, happy ending
Some('*') => {
// since we make wildcard matches non-eager
// go back to use `after_wildcard` only make it less possible to match
//
// matches iff pattern only have '*' till the end
return pattern_text[cur_pos_pattern_text..]
.chars()
.all(|e| e == '*');
}
Some(w) => {
// go back to last wildcard and try next possible char in plain text
if let Some(AfterWildcard {
ref mut plain_idx,
pattern_idx,
}) = after_wildcard
{
// move `plain_idx` in `after_wildcard` to the next position of `w` in plain text
// any positions before that is impossible to match the pattern text
if let Some(i) = plain_text[*plain_idx..].chars().position(|c| c == w) {
*plain_idx = i;
cur_pos_plain_text = *plain_idx;
cur_pos_pattern_text = pattern_idx;
continue;
}
// if `w` doesn't even exists in plain text, then give up
return false;
}
// if no prev wildcard exists, then that's it, no match
return false;
}
}
} else if plain_c != pattern_c {
if pattern_c == Some('*') {
// skip '*'s, one is as good as many
if let Some(i) = pattern_text[cur_pos_pattern_text..]
.chars()
.position(|e| e != '*')
{
cur_pos_pattern_text += i;
} else {
// even better, pattern text ends with a '*', which matches everything
return true;
}
// pattern text doesn't end with this '*', then find next non '*' char
let w = pattern_text.chars().nth(cur_pos_pattern_text).unwrap();
// char in pattern text does exist in plain text
if let Some(i) = plain_text[cur_pos_plain_text..]
.chars()
.position(|c| c == w)
{
// update both positions
after_wildcard.replace(AfterWildcard {
plain_idx: i,
pattern_idx: cur_pos_pattern_text,
});
continue;
}
// otherwise, we cannot match
return false;
}
if let Some(AfterWildcard { pattern_idx, .. }) = after_wildcard {
// go back to last wildcard
if pattern_idx != cur_pos_pattern_text {
cur_pos_pattern_text = pattern_idx;
// matches this char, move pattern idx forward
if pattern_text.chars().nth(cur_pos_pattern_text) == plain_c {
cur_pos_pattern_text += 1;
}
}
// try next plain text char anyway, current one gets swallowed by '*'
// or by a matching char in pattern text
cur_pos_plain_text += 1;
continue;
}
return false;
} else {
cur_pos_plain_text += 1;
cur_pos_pattern_text += 1;
}
}
}
#[cfg(test)]
mod test {
use super::wildcard_match;
#[test]
fn mississippi() {
assert!(wildcard_match("abc", "abc"));
assert!(!wildcard_match("abc*", "a0c"));
assert!(wildcard_match("mississipp**", "mississippi"));
assert!(wildcard_match("mississippi*", "mississippi"));
assert!(wildcard_match("*.*", "mississippi.river"));
assert!(wildcard_match("*ip*", "mississippi.river"));
assert!(wildcard_match("m*i*.*r", "mississippi.river"));
assert!(!wildcard_match("m*x*.*r", "mississippi.river"));
}
}
@ArashPartow
Copy link

A detailed explanation of wildcard pattern matching and general globbing can be found here:

https://www.partow.net/programming/wildcardmatching/index.html

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment