Skip to content

Instantly share code, notes, and snippets.

@airspeedswift
Last active February 3, 2018 23:23
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save airspeedswift/0ce7b7659120c2faff88 to your computer and use it in GitHub Desktop.
Save airspeedswift/0ce7b7659120c2faff88 to your computer and use it in GitHub Desktop.
Minimalist Regex Matcher in Swift
/// Brian Kernighan's article in Beautiful Code talks about the minimalist regex matcher written by
/// Rob Pike for their Practice of Programming book as "one of the best examples of recursion that I
/// have ever seen, and it shows the power of C pointers".
///
/// Swift strings don't use pointers, and the original code relied heavily on the last character of a
/// C string being `\0`, but you can reproduce many of the nice aspects of the original C code using a
/// combination of slicing and `dropFirst`, the `first` function that returns an optional you can then
/// compare to a non-optional character, and the Swift 1.2 `if...let...where`
///
/// In theory no string copying should be happening since the slices are just a subrange view on the
/// text and never modified?
/* match: search for regexp anywhere in text */
func match(regexp: String, text: String) -> Bool {
if first(regexp) == "^" {
return matchHere(dropFirst(regexp), text)
}
for var idx = text.startIndex;;++idx { // must look even if string is empty
if matchHere(regexp, text[idx..<text.endIndex]) {
return true
}
// would love to do this with for idx in startIndex...endIndex instead of
// for;; and break, but String.Index can't advance beyond the endIndex
// unlike a C pointer so you can't use ... like that
if idx == text.endIndex { break }
// do while sufficed in the original C version...
} // while idx++ != string.endIndex
return false
}
/* matchhere: search for regexp at beginning of text */
func matchHere(regexp: String, text: String) -> Bool {
if regexp.isEmpty {
return true
}
if let c = first(regexp) where first(dropFirst(regexp)) == "*" {
return matchStar(c, dropFirst(dropFirst(regexp)), text)
}
if first(regexp) == "$" && dropFirst(regexp).isEmpty {
return text.isEmpty
}
if let tc = first(text), let rc = first(regexp) where rc == "." || tc == rc {
return matchHere(dropFirst(regexp), dropFirst(text))
}
return false
}
/* matchstar: search for c*regexp at beginning of text */
func matchStar(c: Character, regexp: String, text: String) -> Bool {
var idx = text.startIndex
do { /* a * matches zero or more instances */
if matchHere(regexp, text[idx..<text.endIndex]) {
return true
}
} while idx != text.endIndex && (text[idx++] == c || c == ".")
return false
}
/// Alternative matchStar that does longest rather than shortest run of c*
/* matchstar: leftmost longest search for c*regexp */
func matchStar(c: Character, regexp: String, text: String) -> Bool {
var idx = text.startIndex
for (; idx != text.endIndex && (text[idx] == c || c == "."); idx++) { }
do { /* * matches zero or more */
if (matchHere(regexp, text[idx..<text.endIndex])) {
return true
}
if idx == text.startIndex { break }
} while idx-- != text.startIndex
return false
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment