Skip to content

Instantly share code, notes, and snippets.

@tivrfoa
Last active November 2, 2023 13:37
Show Gist options
  • Save tivrfoa/9c984fd4eb61fdf1d91ac95f8091d80a to your computer and use it in GitHub Desktop.
Save tivrfoa/9c984fd4eb61fdf1d91ac95f8091d80a to your computer and use it in GitHub Desktop.
Book Beautiful Code - A Regular Expression Matcher
/*
After reading the description:
Character Meaning
c Matches any literal character c.
. (period) Matches any single character.
^ Matches the beginning of the input string.
$ Matches the end of the input string.
* Matches zero or more occurrences of the previous character.
... I tried to implement in Rust. I got some things, but I had
to look at Rob Pike's implementation. The main idea is to have
a separate function for handling matching at a specific position, which
makes things a lot easier. You end up with a little of code repetition,
but it's much easier to read and understand, and actually fewer lines
of code than if you would write everything in one function.
He also has a function `matchstar`
*/
/// Special chars: ., ^, $, *
fn matches(text: &str, pattern: &str) -> bool {
let s: Vec<char> = text.chars().collect();
let p: Vec<char> = pattern.chars().collect();
if p.is_empty() { return true; }
if s.is_empty() {
return p.len() == 1 && p[0] == '$';
}
if p[0] == '^' && p[p.len() - 1] == '$' {
return text == &pattern[1..p.len() - 1];
}
if p[0] == '^' {
return match_here(text, &pattern[1..]);
}
let mut tp = 0;
while tp <= text.len() && !match_here(&text[tp..], pattern) {
tp += 1;
}
tp <= text.len()
}
fn match_here(text: &str, pattern: &str) -> bool {
let s: Vec<char> = text.chars().collect();
let p: Vec<char> = pattern.chars().collect();
if p.is_empty() { return true; }
if s.is_empty() { return p.len() == 1 && p[0] == '$'; }
if p[0] == '$' { return false; }
if p.len() > 1 && p[1] == '*' {
let mut np = 1;
// handle continuous *, eg: .***
while np < p.len() && p[np] == '*' {
np += 1;
}
return match_here(text, &pattern[np..]) ||
match_here(&text[1..], &pattern[np..]) ||
match_here(&text[1..], pattern);
} else {
if p[0] != '.' && s[0] != p[0] { return false; }
return match_here(&text[1..], &pattern[1..]);
}
}
fn main() {
assert!(matches("xyz*abc", "*"));
assert!(matches("aade", "a*c***d"));
assert!(matches("aade", "a*c***de"));
assert!(!matches("aade", "a*c***def"));
assert!(matches("aabb", "a*b*c*"));
assert!(matches("aabbd", "a*b*c*d"));
assert!(matches("aabbd", "a*b*c***d"));
assert!(matches("aabbd", "a*b*c**.*"));
assert!(matches("aabbde", "a*b*c**.*"));
assert!(matches("aabbd", "a*b*c**.*d"));
assert!(matches(" Brazil Country Banana", "B"));
assert!(matches("aaaaabbbbb", "a*b"));
assert!(matches("aaaaabbbbb", "a*bb"));
assert!(matches("aaaaabbbbb", "a*b*"));
assert!(matches(" Brazil Country Banana", " Brazil Country Banana"));
assert!(matches(" Brazil Country Banana", "^ Brazil Country Banana$"));
assert!(matches(" Brazil Country Banana", "."));
assert!(matches(" Brazil Country Banana", "..."));
assert!(matches(" Brazil Country Banana", ".*"));
assert!(matches(" Brazil Country Banana", ".*****"));
assert!(matches(" Brazil Country Banana", "^ Bra"));
assert!(matches(" Brazil Country Banana", "ana$"));
assert!(!matches(" Brazil Country Banana", "ania$"));
}
@tivrfoa
Copy link
Author

tivrfoa commented Nov 2, 2023

His Solution

/* match: search for regexp anywhere in text */
int match(char *regexp, char *text)
{
	if (regexp[0] == '^')
		return matchhere(regexp+1, text);
	do {
		/* must look even if string is empty */
		if (matchhere(regexp, text))
			return 1;
	} while (*text++ != '\0');
	return 0;
}
/* matchhere: search for regexp at beginning of text */
int matchhere(char *regexp, char *text)
{
	if (regexp[0] == '\0')
		return 1;
	if (regexp[1] == '*')
		return matchstar(regexp[0], regexp+2, text);
	if (regexp[0] == '$' && regexp[1] == '\0')
		return *text == '\0';
	if (*text!='\0' && (regexp[0]=='.' || regexp[0]==*text))
		return matchhere(regexp+1, text+1);
	return 0;
}
/* matchstar: search for c*regexp at beginning of text */
int matchstar(int c, char *regexp, char *text)
{
	do {
		/* a * matches zero or more instances */
		if (matchhere(regexp, text))
			return 1;
	} while (*text != '\0' && (*text++ == c || c == '.'));
	return 0;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment