Skip to content

Instantly share code, notes, and snippets.

@alinebee
Created November 29, 2017 18:27
Show Gist options
  • Save alinebee/ba18bf80e8217f11c8895783e63403c0 to your computer and use it in GitHub Desktop.
Save alinebee/ba18bf80e8217f11c8895783e63403c0 to your computer and use it in GitHub Desktop.
A Swift 2-era extension library to make NSRegularExpression less obnoxious
//
// RegExp.swift
//
// Created by Alun Bestor on 18/11/14.
// Copyright (c) 2014 Alun Bestor. All rights reserved.
//
import Foundation
/// Alias to make `NSRegularExpression` less odious to type.
/// This allows `NSRegularExpressions` to be created with just `RegExp(...)`.
public typealias RegExp = NSRegularExpression
//FIXME: This dependent type should be entirely nested inside RegExp below, but the compiler can be
//excruciatingly brittle when it comes extending nested type definitions, so we define it separately
//and import it via typealias.
/// A regular expression match returned by `RegExp.match`, `RegExp.matchAll` and `RegExp.enumerate`.
/// It represents a part of the string matched by the whole regular expression, along with any individual
/// parts of the string matched by parenthesized subexpressions within the expression.
///
/// The match and any submatches can be retrieved by subscripting:
///
/// - match[0] returns the entire matched string,
/// - match[1] returns the substring matched by the first subexpression, and so on.
///
/// Optional subexpressions that did not participate in the match will be represented as `nil`.
public struct _RegExpMatch: CustomDebugStringConvertible {
/// The entire string that was searched by the regular expression.
public let string: String
/// The NSTextCheckingResult instance returned by the regular expression.
public let result: NSTextCheckingResult
/// - returns: The total number of components in this match, which will be 1 + the number
/// of subexpressions within the original regular expression.
public var count: Int { return result.numberOfRanges }
/// Print a human-readable description of the match and all its submatches.
public var debugDescription: String {
let formattedMatches = self.map { "\"\($0)\"" }
return formattedMatches.joined(separator: ", ")
}
}
/// Allows matched substrings to be iterated or accessed by index.
extension _RegExpMatch: RandomAccessCollection {
//Inherit default integer indexing behavior.
public typealias Indices = CountableRange<Int>
public var startIndex: Int { return 0 }
public var endIndex: Int { return count }
/// - Returns: The substring and range matched by the subexpression at the specified index,
/// or `nil` if that subexpression was optional and did not participate in this match.
///
/// It is a runtime error to access an index beyond the number of subexpressions
/// in the original regular expression.
public subscript(position: Int) -> (string: String, range: Range<String.Index>)? {
let range = result.rangeAt(position)
if range.location != NSNotFound {
let swiftRange = string.rangeFromNSRange(range)
return (string.substring(with: swiftRange), swiftRange)
} else {
return nil
}
}
}
/// Extends NSRegularExpression with concise helper methods and an easily introspectable result type for matches.
public extension RegExp {
public typealias Match = _RegExpMatch
/// The enumeration closure used by `RegExp.enumerate`.
///
/// - parameter match: A Match struct representing the next match in the string.
/// - returns: `true` if enumeration should continue, or `false` if it should stop at this match.
public typealias MatchEnumerator = (Match) -> Bool
/// Shorthand initializer for general-case regular expressions, which makes all parameters optional
/// except the pattern itself.
///
/// - parameter pattern: The string pattern that should be compiled into a regular expression.
/// - parameter options: Options affecting the matching behaviour of the regular expression.
/// Defaults to case-sensitive matching and allowing `.` to match newlines.
/// - parameter error: An optional error that will be returned by reference if the string
/// could not be compiled into a regular expression.
/// - returns: A regular expression compiled from the specified pattern.
/// Returns `nil` if the string could not be compiled to a valid expression.
public convenience init(_ pattern: String, options: Options = .dotMatchesLineSeparators) throws {
try self.init(pattern: pattern, options: options)
}
/// Counts how many times the regular expression matches the specified string.
///
/// - parameter string: The string against which to match the regular expression.
/// - returns: `true` if this regular expression matches the specified string at least once, or `false` if there are no matches.
public func numMatches(in string: String) -> Int {
let wholeStringRange = NSRange(location: 0, length: string.utf8.count)
return self.numberOfMatches(in: string, options: [], range: wholeStringRange)
}
/// Searches the specified string for every match of the regular expression and returns an array of the resulting matches.
///
/// - parameter string: The string against which to match the regular expression.
/// - returns: An array of match tuples extracted from the specified string, or an empty array if the string did not match this regular expression.
public func allMatches(in string: String) -> [Match] {
var matches: [Match] = []
self.enumerate(string) { matches.append($0); return true }
return matches
}
/// Searches the specified string for the first match of the regular expression and returns the resulting match.
///
/// - parameter string: The string against which to match the regular expression.
/// - returns: The first match in the string, or `nil` if the string did not match this regular expression.
public func firstMatch(in string: String) -> Match? {
var firstMatch: Match? = nil
self.enumerate(string) { firstMatch = $0; return false }
return firstMatch
}
/// Finds all instances of this regular expression within a specified string and replaces them with a substitution template.
///
/// - parameter string: The string against which to match this regular expression.
/// - parameter template: The substitution pattern with which to replace all matching substrings.
/// - returns: A string constructed by replacing all matches within the specified string using the specified template.
public func replaceMatches(in string: String, withTemplate template: String) -> String {
let wholeStringRange = NSRange(location: 0, length: string.utf8.count)
return self.stringByReplacingMatches(in: string, options: [], range: wholeStringRange, withTemplate: template)
}
/// Enumerates sets of components matched by the regular expression, calling an enumeration closure for each.
/// The closure can return true to continue to the next match, or false to stop the enumeration.
//TODO: it would be nice to do this with an iterator instead, but unfortunately the underlying (NS)RegularExpression
//API doesn't support it yet.
public func enumerate(_ string: String, enumerator: MatchEnumerator) {
let wholeStringRange = NSRange(location: 0, length: string.utf8.count)
//Wrap Foundation's NSTextCheckingResults in our own Match structs.
self.enumerateMatches(in: string, options: [], range: wholeStringRange) { result, _, stop in
if let result = result {
let match = Match(string: string, result: result)
let shouldContinue = enumerator(match)
if !shouldContinue {
stop.pointee = true
}
}
}
}
}
/**
Allows strings to be compared against regular expressions using Swift's builtin ~= expression operator, such as:
if stringToCompare ~= RegExp("pattern to match") { }
if RegExp("pattern to match") ~= stringToCompare { }
The ~= operator is also used internally by Swift's switch comparisons, which allows constructions like this:
switch stringToCompare {
case RegExp("^matches a specific string$")
return "\(stringToCompare) is a specific string"
case RegExp("[0-9]+") //Matches a number
return "\(stringToCompare) is a number"
default:
return "I have no idea"
}
*/
public func ~= (string: String, regex: RegExp) -> Bool {
return regex.numMatches(in: string) > 0
}
public func ~= (regex: RegExp, string: String) -> Bool {
return regex.numMatches(in: string) > 0
}
// MARK: - String literal support
/// A helper subclass to allow regular expressions to be unconditionally constructed from compile-time string literals.
/// **Note**: This must be a final subcless in order to conform to the initializer requirements of StringLiteralConvertible.
/// (Not that you'd want to subclass it anyway, but this is also why it can't just be implemented as an extension to RegExp.)
public final class RegExpLiteral: RegExp, ExpressibleByStringLiteral {
/// Creates a new regular expression from the specified string literal.
/// Fails with a fatal runtime error if the string was not a valid regular expression pattern.
public convenience init(stringLiteral value: String) {
do {
try self.init(pattern: value, options: .dotMatchesLineSeparators)
} catch let error as NSError {
//If you passed an unparsable literal string, you got compile-time problems
fatalError(error.description)
}
}
public convenience init(extendedGraphemeClusterLiteral value: String) {
self.init(stringLiteral: value)
}
public convenience init(unicodeScalarLiteral value: String) {
self.init(stringLiteral: value)
}
}
//MARK: - String extensions
/// Extends `String` with a more readable API for matching and replacing regular expressions.
public extension String {
/// - Returns: The result of replacing all occurrences of `regexp` in the string with the specified `template`.
public func replacing(_ regexp: RegExp, with template: String) -> String {
return regexp.replaceMatches(in: self, withTemplate: template)
}
/// Modifies the string to replace all occurrences of `regexp` with the specified `template`.
public mutating func replace(_ regexp: RegExp, with template: String) {
self = regexp.replaceMatches(in: self, withTemplate: template)
}
/// - Returns `true` if this string matches one or more times, or `false` otherwise.
public func matches(_ regexp: RegExp) -> Bool {
return regexp.firstMatch(in: self) != nil
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment