Skip to content

Instantly share code, notes, and snippets.

@gribozavr
Created April 13, 2016 01:29
Show Gist options
  • Save gribozavr/ed95f71b762d25bee2991dd9c0191b34 to your computer and use it in GitHub Desktop.
Save gribozavr/ed95f71b762d25bee2991dd9c0191b34 to your computer and use it in GitHub Desktop.
// ---------------------------------------------------------------------------
// Approach #1: use algorithms available in the standard library today.
//
// Disadvantage: the identifier characters are scanned twice, first time to
// advance the index, second time to check if we got the number of characters
// we requested.
extension Character {
var isASCIIDigit: Bool {
switch self {
case "0", "1", "2", "3", "4", "5", "6", "7", "8", "9":
return true
default:
return false
}
}
}
func scanIdentifier1(partialMangled: String) -> (identifier: String, remainder: String)? {
let chars = partialMangled.characters
// Extract the encoded length, followed by the identifier itself.
guard
// Find start of the identifier.
let start = chars.index(where: { !$0.isASCIIDigit }),
// Decode the length.
let length = Int(String(chars.prefix(upTo: start)))
else {
return nil
}
let identifier = chars[start..<chars.index(length, stepsFrom: start, limitedBy: chars.endIndex)]
if identifier.count != length {
// The string was too short.
return nil
}
let remainder = chars.suffix(from: identifier.endIndex)
return (String(identifier), String(remainder))
}
print(scanIdentifier1("1azzz"))
print(scanIdentifier1("10aaaaaaaazzz"))
print(scanIdentifier1("123"))
print(scanIdentifier1("aaa"))
print(scanIdentifier1("aaa123"))
print(scanIdentifier1("aaa123aaa"))
// ---------------------------------------------------------------------------
// Approach #2: define a new algorithm, Sequence.find().
//
// This implementation does not perform any redundant operations, but the
// computation of `identifierEnd` is rather clumsy.
extension Sequence {
@warn_unused_result
func find(where predicate: @noescape (Iterator.Element) throws -> Bool) rethrows -> Iterator.Element? {
for e in self {
if try predicate(e) {
return e
}
}
return nil
}
}
func scanIdentifier2(partialMangled: String) -> (identifier: String, remainder: String)? {
var chars = partialMangled.characters
// Extract the encoded length, followed by the identifier itself.
guard
// Find start of the identifier.
let identifierStart = chars.index(where: { (c: Character) -> Bool in !c.isASCIIDigit }),
// Decode the length.
let length = Int(String(chars.prefix(upTo: identifierStart)))
else {
return nil
}
// Slice away the number that we processed.
chars = chars.suffix(from: identifierStart)
// Find the end of the identifier, failing if the string does not contain enough characters.
guard let identifierEnd = chars.indices.enumerated().find(where: { $0.0 == length})?.1 else {
return nil
}
let identifier = chars.prefix(upTo: identifierEnd)
let remainder = chars.suffix(from: identifierEnd)
return (String(identifier), String(remainder))
}
print(scanIdentifier2("1azzz"))
print(scanIdentifier2("10aaaaaaaazzz"))
print(scanIdentifier2("123"))
print(scanIdentifier2("aaa"))
print(scanIdentifier2("aaa123"))
print(scanIdentifier2("aaa123aaa"))
// ---------------------------------------------------------------------------
// Approach #3: change Collection.index(_:stepsFrom:limitedBy:) to return an
// optional index.
//
// This method has to perform the range check to stop advancing the index when
// it reaches the limit. Currently it just discards the information about
// whether it reached the limit or not. Instead, it can cheaply return it to
// the caller.
//
// Note that the same logic does not apply to other
// Collection.index(_:stepsFrom:) overloads.
extension Collection {
@warn_unused_result
public func index_(
n: IndexDistance, stepsFrom i: Index, limitedBy limit: Index
) -> Index? {
_precondition(n >= 0,
"Only BidirectionalCollections can be advanced by a negative amount")
var i = i
for _ in stride(from: 0, to: n, by: 1) {
if limit == i {
return nil
}
formSuccessor(&i)
}
return i
}
}
func scanIdentifier3(partialMangled: String) -> (identifier: String, remainder: String)? {
let chars = partialMangled.characters
// Extract the encoded length, followed by the identifier itself.
guard
// Find start of the identifier.
let start = chars.index(where: { !$0.isASCIIDigit }),
// Decode the length.
let length = Int(String(chars.prefix(upTo: start))),
// Find the end of the identifier, failing if there are not enough characters.
let end = chars.index_(length, stepsFrom: start, limitedBy: chars.endIndex)
else {
return nil
}
let identifier = chars[start..<end]
let remainder = chars.suffix(from: end)
return (String(identifier), String(remainder))
}
print(scanIdentifier3("1azzz"))
print(scanIdentifier3("10aaaaaaaazzz"))
print(scanIdentifier3("123"))
print(scanIdentifier3("aaa"))
print(scanIdentifier3("aaa123"))
print(scanIdentifier3("aaa123aaa"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment