Created
July 15, 2020 21:01
-
-
Save jjrscott/14c076f8065292c1e1f05272adf0ba65 to your computer and use it in GitHub Desktop.
Reimplementation of Brian Kernighan's Regular Expression Matcher on BidirectionalCollection
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// RegularExpression.swift | |
// RegularExpressions | |
// | |
// Created by John Scott on 15/07/2020. | |
// | |
// Original matching implementation Brian Kernighan : https://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html | |
// Swift port Ben Cohen : https://github.com/apple/swift/blob/a9eee38e109e3d99f103a9bee71bed4422fbb6fd/benchmark/single-source/StringMatch.swift | |
import Foundation | |
enum RegularExpressionElement<Element> : Equatable where Element : Equatable { | |
case any // . | |
case zeroOrMore // * | |
case begin // ^ | |
case end // $ | |
case element(Element) // a character | |
} | |
extension BidirectionalCollection where Element : Equatable { | |
typealias RegularExpression = ArraySlice<RegularExpressionElement<Self.Element>> | |
/* match: search for regexp anywhere in text */ | |
func match(regexp: RegularExpression) -> Bool { | |
if regexp.first == .begin { | |
return matchHere(regexp.dropFirst(), self[...]) | |
} | |
var idx = startIndex | |
while true { // must look even if string is empty | |
if matchHere(regexp[...], self[idx..<endIndex]) { | |
return true | |
} | |
guard idx != endIndex else { break } | |
// do while sufficed in the original C version... | |
formIndex(after: &idx) | |
} // while idx++ != string.endIndex | |
return false | |
} | |
/* matchhere: search for regexp at beginning of text */ | |
func matchHere(_ regexp: RegularExpression, _ text: Self.SubSequence) -> Bool { | |
if regexp.isEmpty { | |
return true | |
} | |
if let c = regexp.first, regexp.dropFirst().first == .zeroOrMore { | |
return matchStar(c, regexp.dropFirst(2), text) | |
} | |
if regexp.first == .end && regexp.dropFirst().isEmpty { | |
return text.isEmpty | |
} | |
if let tc = text.first, let rc = regexp.first, rc == .any || .element(tc) == rc { | |
return matchHere(regexp.dropFirst(), text.dropFirst()) | |
} | |
return false | |
} | |
/* matchstar: search for c*regexp at beginning of text */ | |
func matchStar(_ c: RegularExpression.Element, _ regexp: RegularExpression, _ text: Self.SubSequence) -> Bool { | |
var idx = text.startIndex | |
while true { /* a * matches zero or more instances */ | |
if matchHere(regexp, text[idx..<text.endIndex]) { | |
return true | |
} | |
if idx == text.endIndex || (.element(text[idx]) != c && c != .any) { | |
return false | |
} | |
text.formIndex(after: &idx) | |
} | |
} | |
} | |
func runTests() { | |
let tests: KeyValuePairs<String.RegularExpression, String> = [ | |
[.begin, .element("h"), .any, .any, .element("l"), .element("o"), .zeroOrMore, .element("!"), .end] : "hellooooo!", | |
[.begin, .element("h"), .any, .any, .element("l"), .element("o"), .zeroOrMore, .element("!"), .end] : "hella noms", | |
[.any, .element("a"), .element("b")] : "abracadabra!", | |
[.element("s"), .any, .zeroOrMore] : "saaaad!", | |
[.any, .any, .any, .element("e"), .any, .end] : "\"Ganymede,\" he continued, \"is the largest moon in the Solar System\"", | |
[.element("🤠"), .zeroOrMore] : "even 🤠🤠🤠 are supported", | |
] | |
for (regex, text) in tests { | |
print("\(regex) : \(text) : \(text.match(regexp: regex))") | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment