Skip to content

Instantly share code, notes, and snippets.

@davidbalbert
Created November 28, 2023 21:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidbalbert/fc107e91d507cd0836bb02bcc936f2b3 to your computer and use it in GitHub Desktop.
Save davidbalbert/fc107e91d507cd0836bb02bcc936f2b3 to your computer and use it in GitHub Desktop.
Original fixup(withPrevious: Chunk)
extension Chunk {
mutating func fixup(withPrevious prev: Chunk) -> Bool {
var i = string.startIndex
var first: String.Index?
var old = startBreakState
var new = prev.endBreakState
startBreakState = new
while i < string.unicodeScalars.endIndex {
let scalar = string.unicodeScalars[i]
let a = old.hasBreak(before: scalar)
let b = new.hasBreak(before: scalar)
if b {
first = first ?? i
}
if a && b {
// Found the same break. We're done
break
} else if !a && !b && old == new {
// GraphemeBreakers are in the same state. We're done.
break
}
i = string.unicodeScalars.index(after: i)
}
if let first {
// We found a new first break
prefixCount = string.utf8.distance(from: string.startIndex, to: first)
} else if i >= lastBreak {
// We made it up through lastBreak without finding any breaks
// and now we're in sync. We know there are no more breaks
// ahead of us, which means there are no breaks in the chunk.
// N.b. there is a special case where lastBreak < firstBreak –
// when there were no breaks in the chunk previously. In that
// case lastBreak == startIndex and firstBreak == endIndex.
// But this code works for that situation too. If there were no
// breaks in the chunk previously, and we get in sync anywhere
// in the chunk without finding a break, we know there are still
// no breaks in the chunk, so this code is a no-op.
prefixCount = string.utf8.count
} else if i >= firstBreak {
// We made it up through firstBreak without finding any breaks
// but we got in sync before lastBreak. Find a new firstBreak:
let j = string.unicodeScalars.index(after: i)
var tmp = new
let first = tmp.firstBreak(in: string[j...])!.lowerBound
prefixCount = string.utf8.distance(from: string.startIndex, to: first)
// If this is false, there's a bug in the code, or my assumptions are wrong.
assert(firstBreak <= lastBreak)
}
// There's an implicit else clause to the above– we're in sync, and we
// didn't even get to the old firstBreak. This means the breaks didn't
// change at all.
// We got to the end, either because we're not in sync yet, or because we got
// in sync at right at the end of the chunk. Save the break state.
if i == string.endIndex {
endBreakState = new
}
// We're done if we synced up before the end of the chunk.
return i < string.endIndex
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment