Created
November 29, 2017 18:27
-
-
Save alinebee/ba18bf80e8217f11c8895783e63403c0 to your computer and use it in GitHub Desktop.
A Swift 2-era extension library to make NSRegularExpression less obnoxious
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// RegExp.swift | |
// | |
// Created by Alun Bestor on 18/11/14. | |
// Copyright (c) 2014 Alun Bestor. All rights reserved. | |
// | |
import Foundation | |
/// Alias to make `NSRegularExpression` less odious to type. | |
/// This allows `NSRegularExpressions` to be created with just `RegExp(...)`. | |
public typealias RegExp = NSRegularExpression | |
//FIXME: This dependent type should be entirely nested inside RegExp below, but the compiler can be | |
//excruciatingly brittle when it comes extending nested type definitions, so we define it separately | |
//and import it via typealias. | |
/// A regular expression match returned by `RegExp.match`, `RegExp.matchAll` and `RegExp.enumerate`. | |
/// It represents a part of the string matched by the whole regular expression, along with any individual | |
/// parts of the string matched by parenthesized subexpressions within the expression. | |
/// | |
/// The match and any submatches can be retrieved by subscripting: | |
/// | |
/// - match[0] returns the entire matched string, | |
/// - match[1] returns the substring matched by the first subexpression, and so on. | |
/// | |
/// Optional subexpressions that did not participate in the match will be represented as `nil`. | |
public struct _RegExpMatch: CustomDebugStringConvertible { | |
/// The entire string that was searched by the regular expression. | |
public let string: String | |
/// The NSTextCheckingResult instance returned by the regular expression. | |
public let result: NSTextCheckingResult | |
/// - returns: The total number of components in this match, which will be 1 + the number | |
/// of subexpressions within the original regular expression. | |
public var count: Int { return result.numberOfRanges } | |
/// Print a human-readable description of the match and all its submatches. | |
public var debugDescription: String { | |
let formattedMatches = self.map { "\"\($0)\"" } | |
return formattedMatches.joined(separator: ", ") | |
} | |
} | |
/// Allows matched substrings to be iterated or accessed by index. | |
extension _RegExpMatch: RandomAccessCollection { | |
//Inherit default integer indexing behavior. | |
public typealias Indices = CountableRange<Int> | |
public var startIndex: Int { return 0 } | |
public var endIndex: Int { return count } | |
/// - Returns: The substring and range matched by the subexpression at the specified index, | |
/// or `nil` if that subexpression was optional and did not participate in this match. | |
/// | |
/// It is a runtime error to access an index beyond the number of subexpressions | |
/// in the original regular expression. | |
public subscript(position: Int) -> (string: String, range: Range<String.Index>)? { | |
let range = result.rangeAt(position) | |
if range.location != NSNotFound { | |
let swiftRange = string.rangeFromNSRange(range) | |
return (string.substring(with: swiftRange), swiftRange) | |
} else { | |
return nil | |
} | |
} | |
} | |
/// Extends NSRegularExpression with concise helper methods and an easily introspectable result type for matches. | |
public extension RegExp { | |
public typealias Match = _RegExpMatch | |
/// The enumeration closure used by `RegExp.enumerate`. | |
/// | |
/// - parameter match: A Match struct representing the next match in the string. | |
/// - returns: `true` if enumeration should continue, or `false` if it should stop at this match. | |
public typealias MatchEnumerator = (Match) -> Bool | |
/// Shorthand initializer for general-case regular expressions, which makes all parameters optional | |
/// except the pattern itself. | |
/// | |
/// - parameter pattern: The string pattern that should be compiled into a regular expression. | |
/// - parameter options: Options affecting the matching behaviour of the regular expression. | |
/// Defaults to case-sensitive matching and allowing `.` to match newlines. | |
/// - parameter error: An optional error that will be returned by reference if the string | |
/// could not be compiled into a regular expression. | |
/// - returns: A regular expression compiled from the specified pattern. | |
/// Returns `nil` if the string could not be compiled to a valid expression. | |
public convenience init(_ pattern: String, options: Options = .dotMatchesLineSeparators) throws { | |
try self.init(pattern: pattern, options: options) | |
} | |
/// Counts how many times the regular expression matches the specified string. | |
/// | |
/// - parameter string: The string against which to match the regular expression. | |
/// - returns: `true` if this regular expression matches the specified string at least once, or `false` if there are no matches. | |
public func numMatches(in string: String) -> Int { | |
let wholeStringRange = NSRange(location: 0, length: string.utf8.count) | |
return self.numberOfMatches(in: string, options: [], range: wholeStringRange) | |
} | |
/// Searches the specified string for every match of the regular expression and returns an array of the resulting matches. | |
/// | |
/// - parameter string: The string against which to match the regular expression. | |
/// - returns: An array of match tuples extracted from the specified string, or an empty array if the string did not match this regular expression. | |
public func allMatches(in string: String) -> [Match] { | |
var matches: [Match] = [] | |
self.enumerate(string) { matches.append($0); return true } | |
return matches | |
} | |
/// Searches the specified string for the first match of the regular expression and returns the resulting match. | |
/// | |
/// - parameter string: The string against which to match the regular expression. | |
/// - returns: The first match in the string, or `nil` if the string did not match this regular expression. | |
public func firstMatch(in string: String) -> Match? { | |
var firstMatch: Match? = nil | |
self.enumerate(string) { firstMatch = $0; return false } | |
return firstMatch | |
} | |
/// Finds all instances of this regular expression within a specified string and replaces them with a substitution template. | |
/// | |
/// - parameter string: The string against which to match this regular expression. | |
/// - parameter template: The substitution pattern with which to replace all matching substrings. | |
/// - returns: A string constructed by replacing all matches within the specified string using the specified template. | |
public func replaceMatches(in string: String, withTemplate template: String) -> String { | |
let wholeStringRange = NSRange(location: 0, length: string.utf8.count) | |
return self.stringByReplacingMatches(in: string, options: [], range: wholeStringRange, withTemplate: template) | |
} | |
/// Enumerates sets of components matched by the regular expression, calling an enumeration closure for each. | |
/// The closure can return true to continue to the next match, or false to stop the enumeration. | |
//TODO: it would be nice to do this with an iterator instead, but unfortunately the underlying (NS)RegularExpression | |
//API doesn't support it yet. | |
public func enumerate(_ string: String, enumerator: MatchEnumerator) { | |
let wholeStringRange = NSRange(location: 0, length: string.utf8.count) | |
//Wrap Foundation's NSTextCheckingResults in our own Match structs. | |
self.enumerateMatches(in: string, options: [], range: wholeStringRange) { result, _, stop in | |
if let result = result { | |
let match = Match(string: string, result: result) | |
let shouldContinue = enumerator(match) | |
if !shouldContinue { | |
stop.pointee = true | |
} | |
} | |
} | |
} | |
} | |
/** | |
Allows strings to be compared against regular expressions using Swift's builtin ~= expression operator, such as: | |
if stringToCompare ~= RegExp("pattern to match") { } | |
if RegExp("pattern to match") ~= stringToCompare { } | |
The ~= operator is also used internally by Swift's switch comparisons, which allows constructions like this: | |
switch stringToCompare { | |
case RegExp("^matches a specific string$") | |
return "\(stringToCompare) is a specific string" | |
case RegExp("[0-9]+") //Matches a number | |
return "\(stringToCompare) is a number" | |
default: | |
return "I have no idea" | |
} | |
*/ | |
public func ~= (string: String, regex: RegExp) -> Bool { | |
return regex.numMatches(in: string) > 0 | |
} | |
public func ~= (regex: RegExp, string: String) -> Bool { | |
return regex.numMatches(in: string) > 0 | |
} | |
// MARK: - String literal support | |
/// A helper subclass to allow regular expressions to be unconditionally constructed from compile-time string literals. | |
/// **Note**: This must be a final subcless in order to conform to the initializer requirements of StringLiteralConvertible. | |
/// (Not that you'd want to subclass it anyway, but this is also why it can't just be implemented as an extension to RegExp.) | |
public final class RegExpLiteral: RegExp, ExpressibleByStringLiteral { | |
/// Creates a new regular expression from the specified string literal. | |
/// Fails with a fatal runtime error if the string was not a valid regular expression pattern. | |
public convenience init(stringLiteral value: String) { | |
do { | |
try self.init(pattern: value, options: .dotMatchesLineSeparators) | |
} catch let error as NSError { | |
//If you passed an unparsable literal string, you got compile-time problems | |
fatalError(error.description) | |
} | |
} | |
public convenience init(extendedGraphemeClusterLiteral value: String) { | |
self.init(stringLiteral: value) | |
} | |
public convenience init(unicodeScalarLiteral value: String) { | |
self.init(stringLiteral: value) | |
} | |
} | |
//MARK: - String extensions | |
/// Extends `String` with a more readable API for matching and replacing regular expressions. | |
public extension String { | |
/// - Returns: The result of replacing all occurrences of `regexp` in the string with the specified `template`. | |
public func replacing(_ regexp: RegExp, with template: String) -> String { | |
return regexp.replaceMatches(in: self, withTemplate: template) | |
} | |
/// Modifies the string to replace all occurrences of `regexp` with the specified `template`. | |
public mutating func replace(_ regexp: RegExp, with template: String) { | |
self = regexp.replaceMatches(in: self, withTemplate: template) | |
} | |
/// - Returns `true` if this string matches one or more times, or `false` otherwise. | |
public func matches(_ regexp: RegExp) -> Bool { | |
return regexp.firstMatch(in: self) != nil | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment