Last active
November 2, 2023 13:37
-
-
Save tivrfoa/9c984fd4eb61fdf1d91ac95f8091d80a to your computer and use it in GitHub Desktop.
Book Beautiful Code - A Regular Expression Matcher
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
After reading the description: | |
Character Meaning | |
c Matches any literal character c. | |
. (period) Matches any single character. | |
^ Matches the beginning of the input string. | |
$ Matches the end of the input string. | |
* Matches zero or more occurrences of the previous character. | |
... I tried to implement in Rust. I got some things, but I had | |
to look at Rob Pike's implementation. The main idea is to have | |
a separate function for handling matching at a specific position, which | |
makes things a lot easier. You end up with a little of code repetition, | |
but it's much easier to read and understand, and actually fewer lines | |
of code than if you would write everything in one function. | |
He also has a function `matchstar` | |
*/ | |
/// Special chars: ., ^, $, * | |
fn matches(text: &str, pattern: &str) -> bool { | |
let s: Vec<char> = text.chars().collect(); | |
let p: Vec<char> = pattern.chars().collect(); | |
if p.is_empty() { return true; } | |
if s.is_empty() { | |
return p.len() == 1 && p[0] == '$'; | |
} | |
if p[0] == '^' && p[p.len() - 1] == '$' { | |
return text == &pattern[1..p.len() - 1]; | |
} | |
if p[0] == '^' { | |
return match_here(text, &pattern[1..]); | |
} | |
let mut tp = 0; | |
while tp <= text.len() && !match_here(&text[tp..], pattern) { | |
tp += 1; | |
} | |
tp <= text.len() | |
} | |
fn match_here(text: &str, pattern: &str) -> bool { | |
let s: Vec<char> = text.chars().collect(); | |
let p: Vec<char> = pattern.chars().collect(); | |
if p.is_empty() { return true; } | |
if s.is_empty() { return p.len() == 1 && p[0] == '$'; } | |
if p[0] == '$' { return false; } | |
if p.len() > 1 && p[1] == '*' { | |
let mut np = 1; | |
// handle continuous *, eg: .*** | |
while np < p.len() && p[np] == '*' { | |
np += 1; | |
} | |
return match_here(text, &pattern[np..]) || | |
match_here(&text[1..], &pattern[np..]) || | |
match_here(&text[1..], pattern); | |
} else { | |
if p[0] != '.' && s[0] != p[0] { return false; } | |
return match_here(&text[1..], &pattern[1..]); | |
} | |
} | |
fn main() { | |
assert!(matches("xyz*abc", "*")); | |
assert!(matches("aade", "a*c***d")); | |
assert!(matches("aade", "a*c***de")); | |
assert!(!matches("aade", "a*c***def")); | |
assert!(matches("aabb", "a*b*c*")); | |
assert!(matches("aabbd", "a*b*c*d")); | |
assert!(matches("aabbd", "a*b*c***d")); | |
assert!(matches("aabbd", "a*b*c**.*")); | |
assert!(matches("aabbde", "a*b*c**.*")); | |
assert!(matches("aabbd", "a*b*c**.*d")); | |
assert!(matches(" Brazil Country Banana", "B")); | |
assert!(matches("aaaaabbbbb", "a*b")); | |
assert!(matches("aaaaabbbbb", "a*bb")); | |
assert!(matches("aaaaabbbbb", "a*b*")); | |
assert!(matches(" Brazil Country Banana", " Brazil Country Banana")); | |
assert!(matches(" Brazil Country Banana", "^ Brazil Country Banana$")); | |
assert!(matches(" Brazil Country Banana", ".")); | |
assert!(matches(" Brazil Country Banana", "...")); | |
assert!(matches(" Brazil Country Banana", ".*")); | |
assert!(matches(" Brazil Country Banana", ".*****")); | |
assert!(matches(" Brazil Country Banana", "^ Bra")); | |
assert!(matches(" Brazil Country Banana", "ana$")); | |
assert!(!matches(" Brazil Country Banana", "ania$")); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
His Solution