Last active
February 3, 2018 23:23
-
-
Save airspeedswift/0ce7b7659120c2faff88 to your computer and use it in GitHub Desktop.
Minimalist Regex Matcher in Swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// Brian Kernighan's article in Beautiful Code talks about the minimalist regex matcher written by | |
/// Rob Pike for their Practice of Programming book as "one of the best examples of recursion that I | |
/// have ever seen, and it shows the power of C pointers". | |
/// | |
/// Swift strings don't use pointers, and the original code relied heavily on the last character of a | |
/// C string being `\0`, but you can reproduce many of the nice aspects of the original C code using a | |
/// combination of slicing and `dropFirst`, the `first` function that returns an optional you can then | |
/// compare to a non-optional character, and the Swift 1.2 `if...let...where` | |
/// | |
/// In theory no string copying should be happening since the slices are just a subrange view on the | |
/// text and never modified? | |
/* match: search for regexp anywhere in text */ | |
func match(regexp: String, text: String) -> Bool { | |
if first(regexp) == "^" { | |
return matchHere(dropFirst(regexp), text) | |
} | |
for var idx = text.startIndex;;++idx { // must look even if string is empty | |
if matchHere(regexp, text[idx..<text.endIndex]) { | |
return true | |
} | |
// would love to do this with for idx in startIndex...endIndex instead of | |
// for;; and break, but String.Index can't advance beyond the endIndex | |
// unlike a C pointer so you can't use ... like that | |
if idx == text.endIndex { break } | |
// do while sufficed in the original C version... | |
} // while idx++ != string.endIndex | |
return false | |
} | |
/* matchhere: search for regexp at beginning of text */ | |
func matchHere(regexp: String, text: String) -> Bool { | |
if regexp.isEmpty { | |
return true | |
} | |
if let c = first(regexp) where first(dropFirst(regexp)) == "*" { | |
return matchStar(c, dropFirst(dropFirst(regexp)), text) | |
} | |
if first(regexp) == "$" && dropFirst(regexp).isEmpty { | |
return text.isEmpty | |
} | |
if let tc = first(text), let rc = first(regexp) where rc == "." || tc == rc { | |
return matchHere(dropFirst(regexp), dropFirst(text)) | |
} | |
return false | |
} | |
/* matchstar: search for c*regexp at beginning of text */ | |
func matchStar(c: Character, regexp: String, text: String) -> Bool { | |
var idx = text.startIndex | |
do { /* a * matches zero or more instances */ | |
if matchHere(regexp, text[idx..<text.endIndex]) { | |
return true | |
} | |
} while idx != text.endIndex && (text[idx++] == c || c == ".") | |
return false | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// Alternative matchStar that does longest rather than shortest run of c* | |
/* matchstar: leftmost longest search for c*regexp */ | |
func matchStar(c: Character, regexp: String, text: String) -> Bool { | |
var idx = text.startIndex | |
for (; idx != text.endIndex && (text[idx] == c || c == "."); idx++) { } | |
do { /* * matches zero or more */ | |
if (matchHere(regexp, text[idx..<text.endIndex])) { | |
return true | |
} | |
if idx == text.startIndex { break } | |
} while idx-- != text.startIndex | |
return false | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment