Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Attempt at a nicer API for Swift RegEx (WIP)
// Simple wrapper around NSRegularExpression to provide a swiftier API and, ability to have matches exposing Range instead of NSRange
import Foundation
struct RegEx<Names> {
let regex: NSRegularExpression
init(pattern: String, options: NSRegularExpression.Options = []) throws {
self.regex = try NSRegularExpression(pattern: pattern, options: options)
}
struct Match {
let fullString: String
let result: NSTextCheckingResult
var range: Range<String.Index> {
return self.range(at: 0)
}
var text: Substring {
return fullString[range]
}
var numberOfRanges: Int {
return result.numberOfRanges
}
func range(at index: Int) -> Range<String.Index> {
return Range(result.range(at: index), in: fullString)!
}
subscript(index: Int) -> Substring {
return fullString[self.range(at: index)]
}
}
func matches(in string: String, options: NSRegularExpression.MatchingOptions = []) -> [Match] {
let nsrange = NSRange(string.startIndex... , in: string)
let results = self.regex.matches(in: string, options: options, range: nsrange)
return results.map { match in
Match(fullString: string, result: match)
}
}
}
extension RegEx.Match where Names: RawRepresentable, Names.RawValue == String {
func range(named name: Names) -> Range<String.Index> {
return Range(result.range(withName: name.rawValue), in: fullString)!
}
subscript(name: Names) -> Substring {
return fullString[self.range(named: name)]
}
}
extension RegEx: ExpressibleByStringLiteral {
init(stringLiteral value: String) {
try! self.init(pattern: value)
}
}
extension RegEx.Match: CustomStringConvertible {
var description: String {
return String(self.text)
}
}
extension RegEx.Match: CustomDebugStringConvertible {
var debugDescription: String {
let submatches: [String] = (0..<self.numberOfRanges).map {
let range = self.range(at: $0)
let start = fullString.distance(from: fullString.startIndex, to: range.lowerBound)
let end = fullString.distance(from: fullString.startIndex, to: range.upperBound)
return " - [\($0)]: [\(start)...\(end)] = \"\(self[$0])\""
}
return "RegEx.Match \"\(text)\"\n" + submatches.joined(separator: "\n")
}
}
//: ## Builder API
extension RegEx {
struct Component: CustomStringConvertible {
var pattern: String
var description: String { pattern }
static func literal(_ text: String) -> Component {
.init(pattern: NSRegularExpression.escapedPattern(for: text))
}
static func zeroOrMore(_ comp: Component, greedy: Bool = false) -> Component {
.init(pattern: "(?:\(comp))*" + (greedy ? "?" : ""))
}
static func oneOrMore(_ comp: Component, greedy: Bool = false) -> Component {
.init(pattern: "(?:\(comp))+" + (greedy ? "?" : ""))
}
static func oneOf(_ chars: Set<Character>...) -> Component {
// FIXME: if chars contains ] or -, put at start, if contains ^ put at end
let merged = chars.reduce(Set<Character>()) { $0.union($1) }
return .init(pattern: "[\(merged)]")
}
static func oneOf(_ chars: ClosedRange<Character>...) -> Component {
// FIXME: if chars contains ] or -, put at start, if contains ^ put at end
let merged = chars.map { "\($0.lowerBound)-\($0.upperBound)" }.joined(separator: "")
return .init(pattern: "[\(merged)]")
}
static func oneOf(_ string: String) -> Component {
// FIXME: if chars contains ] or -, put at start, if contains ^ put at end
oneOf(Set<Character>(string))
}
static func noneOf(_ chars: String) -> Component {
// FIXME: Expect a Set<Character> instead
// FIXME: if chars contains ] put at start, if contains ^ put at end
// FIXME: provide a nice API to provide ranges of characters like A-Z
.init(pattern: "[^\(chars)]")
}
static func capture(_ comps: RegEx.Component...) -> RegEx.Component {
let joined = comps.map({$0.description}).joined(separator: "")
return .init(pattern: "(\(joined))")
}
}
static func compose(_ components: Component...) throws -> RegEx {
let pattern = components.map { $0.description }.joined(separator: "")
return try RegEx(pattern: pattern)
}
}
extension RegEx.Component where Names: RawRepresentable, Names.RawValue == String {
static func capture(name: Names, _ comps: RegEx.Component...) -> RegEx.Component {
let joined = comps.map({$0.description}).joined(separator: "")
return .init(pattern: "(?<\(name.rawValue)>\(joined))")
}
}
//: ## Example usage
enum Fields: String {
case firstname
}
let re = try RegEx<Fields>.compose(
.literal("Hello "),
.capture(name: .firstname, .oneOrMore(.oneOf("A"..."Z", "a"..."z"))),
.literal("!")
)
print(re.regex.pattern) // Hello (?<firstname>(?:[A-Za-z])+)!
if let firstMatch = re.matches(in: "Hello Bob!").first {
print("firstname:", firstMatch[.firstname]) // firstname: Bob
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment