Skip to content

Instantly share code, notes, and snippets.

@harlanhaskins
Last active August 4, 2020 23:45
Show Gist options
  • Star 11 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save harlanhaskins/4331dc15961ab4a71c1356a2ed569fe0 to your computer and use it in GitHub Desktop.
Save harlanhaskins/4331dc15961ab4a71c1356a2ed569fe0 to your computer and use it in GitHub Desktop.
Swift String Scanner
import Foundation
/// `StringScanner` is a fast scanner for Strings and String-like objects.
/// It's used to extract structured bits from unstructured strings, while
/// avoiding making extra copies of string bits until absolutely necessary.
/// You can build Scanners over Substrings, allowing you to scan
/// parts of strings and use smaller, more specialized scanners to extract bits
/// of that String without needing to reuse another scanner.
public struct StringScanner<Input: StringProtocol> {
let input: Input
var index: Input.Index
/// Creates a new scanner over the provided input.
/// - Parameter input: The string or substring to scan.
public init(_ input: Input) {
self.input = input
self.index = input.startIndex
}
var currentChar: Character? {
return index < input.endIndex ? input[index] : nil
}
mutating func advance() {
_ = input.formIndex(&index, offsetBy: 1, limitedBy: input.endIndex)
}
/// Whether this scanner has exhausted all of its input string.
public var hasInput: Bool {
return currentChar != nil
}
/// Attempts to scan an integer from the current position in the input string.
/// If the cursor is not pointing to an integer, this function returns `nil`.
public mutating func scanInt() -> Int? {
guard isAt(.decimalDigits) else {
return nil
}
let int = scan(.decimalDigits)
return Int(int)
}
/// Scans multiple integers separated by the provided separator set.
/// It stops scanning as soon as the first non-integer character is found
/// that is not in the separator set.
///
/// - Parameter separator: The character set that separates each integer
/// in the input.
/// - Returns: All integers from the current cursor position, separated by
/// the separator character.
public mutating func scanInts(separatedBy separator: CharacterSet) -> [Int] {
let rawInts = scan(.decimalDigits, separatedBy: separator)
return rawInts.map { Int($0)! }
}
/// Scans the provided characters in the provided character set, each time
/// skipping characters in a separator character set.
/// You can use this to extract similar data separated by a given separator.
///
/// - Parameters:
/// - chars: The set of characters you're trying to keep.
/// - separator: The set of separator characters between each entry
/// you're scanning.
/// - Returns: All substrings that matched the initial character set with the
/// `separator` set between them.
public mutating func scan(
_ chars: CharacterSet,
separatedBy separator: CharacterSet
) -> [Input.SubSequence] {
var results = [Input.SubSequence]()
repeat {
results.append(scan(chars))
guard isAt(separator) else {
break
}
skip(separator)
} while isAt(chars)
return results
}
/// All remaining input that has yet to be consumed. Useful for debugging.
public var remainingInput: Input.SubSequence {
return input[index...]
}
/// Determines if the scanner is currently pointing to a member of a character
/// set.
/// - Parameter chars: The character set you're testing the current character
/// against.
public func isAt(_ chars: CharacterSet) -> Bool {
guard let c = currentChar else { return false }
for scalar in c.unicodeScalars {
if !chars.contains(scalar) { return false }
}
return true
}
/// Scans and saves all characters up to, but not including, the first
/// character that is contained within the provided character set.
///
/// - Parameter chars: The character set that signals the end of the scanned
/// region.
/// - Returns: The sequence of characters up to, but not including, the first
/// character that appears in the provided character set.
public mutating func scanUpTo(_ chars: CharacterSet) -> Input.SubSequence {
let start = index
skip(to: chars)
return input[start..<index]
}
/// Scans and saves all characters that are contained within the provided
/// character set.
///
/// - Parameter chars: The character set that each scanned character must
/// belong to.
/// - Returns: The sequence of characters that all are contained within the
/// character set, starting at the current character.
public mutating func scan(_ chars: CharacterSet) -> Input.SubSequence {
let start = index
skip(chars)
return input[start..<index]
}
/// Scans the exact string or substring provided, and returns its range in the
/// input string, or `nil` if it did not match.
///
/// - Parameter string: The string to match.
/// - Returns: The range in the input string where this string first
/// appears after the current cursor, or `nil` if the string
/// does not appear.
public mutating func scan<Str: StringProtocol>(
_ string: Str
) -> Input.SubSequence? {
let start = index
guard skip(string) else { return nil }
return input[start..<index]
}
/// Skips the exact string or substring provided, if it is currently at the
/// Scanner's cursor.
///
/// - Parameter string: The string to skip.
public mutating func skip<Str: StringProtocol>(_ string: Str) -> Bool {
let start = index
var scanner = StringScanner<Str>(string)
while let char = currentChar, let strChar = scanner.currentChar {
if char == strChar {
advance()
scanner.advance()
} else {
index = start
return false
}
}
return !scanner.hasInput
}
/// Skips all characters up to, but not including, the first character in the
/// provided character set.
///
/// - Parameter chars: The character set that signals the end of the skipped
/// region.
public mutating func skip(to chars: CharacterSet) {
while currentChar != nil && !isAt(chars) {
advance()
}
}
/// Skips all characters in the provided character set.
///
/// - Parameter chars: The character set that should be skipped.
public mutating func skip(_ chars: CharacterSet) {
while isAt(chars) {
advance()
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment