Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jjrscott/14c076f8065292c1e1f05272adf0ba65 to your computer and use it in GitHub Desktop.
Save jjrscott/14c076f8065292c1e1f05272adf0ba65 to your computer and use it in GitHub Desktop.
Reimplementation of Brian Kernighan's Regular Expression Matcher on BidirectionalCollection
//
// RegularExpression.swift
// RegularExpressions
//
// Created by John Scott on 15/07/2020.
//
// Original matching implementation Brian Kernighan : https://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html
// Swift port Ben Cohen : https://github.com/apple/swift/blob/a9eee38e109e3d99f103a9bee71bed4422fbb6fd/benchmark/single-source/StringMatch.swift
import Foundation
enum RegularExpressionElement<Element> : Equatable where Element : Equatable {
case any // .
case zeroOrMore // *
case begin // ^
case end // $
case element(Element) // a character
}
extension BidirectionalCollection where Element : Equatable {
typealias RegularExpression = ArraySlice<RegularExpressionElement<Self.Element>>
/* match: search for regexp anywhere in text */
func match(regexp: RegularExpression) -> Bool {
if regexp.first == .begin {
return matchHere(regexp.dropFirst(), self[...])
}
var idx = startIndex
while true { // must look even if string is empty
if matchHere(regexp[...], self[idx..<endIndex]) {
return true
}
guard idx != endIndex else { break }
// do while sufficed in the original C version...
formIndex(after: &idx)
} // while idx++ != string.endIndex
return false
}
/* matchhere: search for regexp at beginning of text */
func matchHere(_ regexp: RegularExpression, _ text: Self.SubSequence) -> Bool {
if regexp.isEmpty {
return true
}
if let c = regexp.first, regexp.dropFirst().first == .zeroOrMore {
return matchStar(c, regexp.dropFirst(2), text)
}
if regexp.first == .end && regexp.dropFirst().isEmpty {
return text.isEmpty
}
if let tc = text.first, let rc = regexp.first, rc == .any || .element(tc) == rc {
return matchHere(regexp.dropFirst(), text.dropFirst())
}
return false
}
/* matchstar: search for c*regexp at beginning of text */
func matchStar(_ c: RegularExpression.Element, _ regexp: RegularExpression, _ text: Self.SubSequence) -> Bool {
var idx = text.startIndex
while true { /* a * matches zero or more instances */
if matchHere(regexp, text[idx..<text.endIndex]) {
return true
}
if idx == text.endIndex || (.element(text[idx]) != c && c != .any) {
return false
}
text.formIndex(after: &idx)
}
}
}
func runTests() {
let tests: KeyValuePairs<String.RegularExpression, String> = [
[.begin, .element("h"), .any, .any, .element("l"), .element("o"), .zeroOrMore, .element("!"), .end] : "hellooooo!",
[.begin, .element("h"), .any, .any, .element("l"), .element("o"), .zeroOrMore, .element("!"), .end] : "hella noms",
[.any, .element("a"), .element("b")] : "abracadabra!",
[.element("s"), .any, .zeroOrMore] : "saaaad!",
[.any, .any, .any, .element("e"), .any, .end] : "\"Ganymede,\" he continued, \"is the largest moon in the Solar System\"",
[.element("🤠"), .zeroOrMore] : "even 🤠🤠🤠 are supported",
]
for (regex, text) in tests {
print("\(regex) : \(text) : \(text.match(regexp: regex))")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment