Created
March 16, 2020 15:01
-
-
Save AliSoftware/5d9fa5ca47cafb661f80acf4cb9ec197 to your computer and use it in GitHub Desktop.
Attempt at a nicer API for Swift RegEx (WIP)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Simple wrapper around NSRegularExpression to provide a swiftier API and, ability to have matches exposing Range instead of NSRange | |
import Foundation | |
struct RegEx<Names> { | |
let regex: NSRegularExpression | |
init(pattern: String, options: NSRegularExpression.Options = []) throws { | |
self.regex = try NSRegularExpression(pattern: pattern, options: options) | |
} | |
struct Match { | |
let fullString: String | |
let result: NSTextCheckingResult | |
var range: Range<String.Index> { | |
return self.range(at: 0) | |
} | |
var text: Substring { | |
return fullString[range] | |
} | |
var numberOfRanges: Int { | |
return result.numberOfRanges | |
} | |
func range(at index: Int) -> Range<String.Index> { | |
return Range(result.range(at: index), in: fullString)! | |
} | |
subscript(index: Int) -> Substring { | |
return fullString[self.range(at: index)] | |
} | |
} | |
func matches(in string: String, options: NSRegularExpression.MatchingOptions = []) -> [Match] { | |
let nsrange = NSRange(string.startIndex... , in: string) | |
let results = self.regex.matches(in: string, options: options, range: nsrange) | |
return results.map { match in | |
Match(fullString: string, result: match) | |
} | |
} | |
} | |
extension RegEx.Match where Names: RawRepresentable, Names.RawValue == String { | |
func range(named name: Names) -> Range<String.Index> { | |
return Range(result.range(withName: name.rawValue), in: fullString)! | |
} | |
subscript(name: Names) -> Substring { | |
return fullString[self.range(named: name)] | |
} | |
} | |
extension RegEx: ExpressibleByStringLiteral { | |
init(stringLiteral value: String) { | |
try! self.init(pattern: value) | |
} | |
} | |
extension RegEx.Match: CustomStringConvertible { | |
var description: String { | |
return String(self.text) | |
} | |
} | |
extension RegEx.Match: CustomDebugStringConvertible { | |
var debugDescription: String { | |
let submatches: [String] = (0..<self.numberOfRanges).map { | |
let range = self.range(at: $0) | |
let start = fullString.distance(from: fullString.startIndex, to: range.lowerBound) | |
let end = fullString.distance(from: fullString.startIndex, to: range.upperBound) | |
return " - [\($0)]: [\(start)...\(end)] = \"\(self[$0])\"" | |
} | |
return "RegEx.Match \"\(text)\"\n" + submatches.joined(separator: "\n") | |
} | |
} | |
//: ## Builder API | |
extension RegEx { | |
struct Component: CustomStringConvertible { | |
var pattern: String | |
var description: String { pattern } | |
static func literal(_ text: String) -> Component { | |
.init(pattern: NSRegularExpression.escapedPattern(for: text)) | |
} | |
static func zeroOrMore(_ comp: Component, greedy: Bool = false) -> Component { | |
.init(pattern: "(?:\(comp))*" + (greedy ? "?" : "")) | |
} | |
static func oneOrMore(_ comp: Component, greedy: Bool = false) -> Component { | |
.init(pattern: "(?:\(comp))+" + (greedy ? "?" : "")) | |
} | |
static func oneOf(_ chars: Set<Character>...) -> Component { | |
// FIXME: if chars contains ] or -, put at start, if contains ^ put at end | |
let merged = chars.reduce(Set<Character>()) { $0.union($1) } | |
return .init(pattern: "[\(merged)]") | |
} | |
static func oneOf(_ chars: ClosedRange<Character>...) -> Component { | |
// FIXME: if chars contains ] or -, put at start, if contains ^ put at end | |
let merged = chars.map { "\($0.lowerBound)-\($0.upperBound)" }.joined(separator: "") | |
return .init(pattern: "[\(merged)]") | |
} | |
static func oneOf(_ string: String) -> Component { | |
// FIXME: if chars contains ] or -, put at start, if contains ^ put at end | |
oneOf(Set<Character>(string)) | |
} | |
static func noneOf(_ chars: String) -> Component { | |
// FIXME: Expect a Set<Character> instead | |
// FIXME: if chars contains ] put at start, if contains ^ put at end | |
// FIXME: provide a nice API to provide ranges of characters like A-Z | |
.init(pattern: "[^\(chars)]") | |
} | |
static func capture(_ comps: RegEx.Component...) -> RegEx.Component { | |
let joined = comps.map({$0.description}).joined(separator: "") | |
return .init(pattern: "(\(joined))") | |
} | |
} | |
static func compose(_ components: Component...) throws -> RegEx { | |
let pattern = components.map { $0.description }.joined(separator: "") | |
return try RegEx(pattern: pattern) | |
} | |
} | |
extension RegEx.Component where Names: RawRepresentable, Names.RawValue == String { | |
static func capture(name: Names, _ comps: RegEx.Component...) -> RegEx.Component { | |
let joined = comps.map({$0.description}).joined(separator: "") | |
return .init(pattern: "(?<\(name.rawValue)>\(joined))") | |
} | |
} | |
//: ## Example usage | |
enum Fields: String { | |
case firstname | |
} | |
let re = try RegEx<Fields>.compose( | |
.literal("Hello "), | |
.capture(name: .firstname, .oneOrMore(.oneOf("A"..."Z", "a"..."z"))), | |
.literal("!") | |
) | |
print(re.regex.pattern) // Hello (?<firstname>(?:[A-Za-z])+)! | |
if let firstMatch = re.matches(in: "Hello Bob!").first { | |
print("firstname:", firstMatch[.firstname]) // firstname: Bob | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment