Skip to content

Instantly share code, notes, and snippets.

@davidbalbert
Created December 11, 2023 17:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidbalbert/f07c865defe6e070d4c2afdd1454f343 to your computer and use it in GitHub Desktop.
Save davidbalbert/f07c865defe6e070d4c2afdd1454f343 to your computer and use it in GitHub Desktop.
Partially working BigIndexSet + LineCache
//
// BigIndexSet.swift
// Watt
//
// Created by David Albert on 12/9/23.
//
import Foundation
struct BigIndexSet: BTree {
var root: BTreeNode<BigIndexSetSummary>
init(_ root: BTreeNode<BigIndexSetSummary>) {
self.root = root
}
}
struct BigIndexSetSummary: BTreeSummary {
var indexCount: Int
static var zero: BigIndexSetSummary {
self.init(indexCount: 0)
}
static func += (lhs: inout BigIndexSetSummary, rhs: BigIndexSetSummary) {
lhs.indexCount += rhs.indexCount
}
init(summarizing leaf: BigIndexSetLeaf) {
indexCount = leaf.data.count
}
init(indexCount: Int) {
self.indexCount = indexCount
}
}
extension BigIndexSetSummary: BTreeDefaultMetric {
static var defaultMetric: BigIndexSet.IndicesBaseMetric { BigIndexSet.IndicesBaseMetric() }
}
struct BigIndexSetLeaf: BTreeLeaf {
static let minSize = 32
static let maxSize = 64
var count: Int
var data: [Int]
static var zero: BigIndexSetLeaf {
self.init(count: 0, data: [])
}
var isUndersized: Bool {
data.count < Self.minSize
}
mutating func pushMaybeSplitting(other: BigIndexSetLeaf) -> BigIndexSetLeaf? {
for i in other.data {
data.append(i + count)
}
count += other.count
if data.count <= Self.maxSize {
return nil
} else {
let splitIndex = data.count / 2
let splitCount = data[splitIndex - 1]
var new = Array(data[splitIndex...])
for i in 0..<new.count {
new[i] -= splitCount
}
let newCount = count - splitCount
data.removeLast(new.count)
count = splitCount
return BigIndexSetLeaf(count: newCount, data: Array(new))
}
}
subscript(bounds: Range<Int>) -> BigIndexSetLeaf {
var d: [Int] = []
for i in data {
if bounds.contains(i) {
d.append(i - bounds.lowerBound)
}
}
return BigIndexSetLeaf(count: bounds.count, data: d)
}
}
extension BigIndexSet {
var upperBound: Int {
root.count
}
}
extension BigIndexSet: BidirectionalCollection {
typealias Index = BTreeNode<BigIndexSetSummary>.Index
var count: Int {
root.measure(using: .indices)
}
var startIndex: Index {
return root.startIndex
}
var endIndex: Index {
return root.endIndex
}
subscript(position: Index) -> Int {
position.validate(for: root)
precondition(position >= startIndex && position < endIndex, "Index out of bounds")
let i = index(roundingDown: position)
let (_, offset) = i.read()!
return offset
}
func index(before i: Index) -> Index {
return root.index(before: i, using: .indices)
}
func index(after i: Index) -> Index {
return root.index(after: i, using: .indices)
}
func index(_ i: Index, offsetBy distance: Int) -> Index {
root.index(i, offsetBy: distance, using: .indices)
}
func index(_ i: Index, offsetBy distance: Int, limitedBy limit: Index) -> Index? {
root.index(i, offsetBy: distance, limitedBy: limit, using: .indices)
}
func distance(from start: Index, to end: Index) -> Int {
root.distance(from: start, to: end, using: .indices)
}
}
extension BigIndexSet {
func index(roundingDown i: Index) -> Index {
return root.index(roundingDown: i, using: .indices)
}
func isBoundary(_ i: Index) -> Bool {
i.validate(for: root)
return i.isBoundary(in: .indices)
}
}
extension BigIndexSet {
struct IndicesBaseMetric: BTreeMetric {
func measure(summary: BigIndexSetSummary, count: Int) -> Int {
count
}
func convertToBaseUnits(_ measuredUnits: Int, in leaf: BigIndexSetLeaf) -> Int {
measuredUnits
}
func convertFromBaseUnits(_ baseUnits: Int, in leaf: BigIndexSetLeaf) -> Int {
baseUnits
}
func isBoundary(_ offset: Int, in leaf: BigIndexSetLeaf) -> Bool {
IndicesMetric().isBoundary(offset, in: leaf)
}
func prev(_ offset: Int, in leaf: BigIndexSetLeaf) -> Int? {
IndicesMetric().prev(offset, in: leaf)
}
func next(_ offset: Int, in leaf: BigIndexSetLeaf) -> Int? {
IndicesMetric().next(offset, in: leaf)
}
var canFragment: Bool {
false
}
var type: BTreeMetricType {
.trailing
}
}
}
extension BTreeMetric<BigIndexSetSummary> where Self == BigIndexSet.IndicesBaseMetric {
static var indicesBaseMetric: BigIndexSet.IndicesBaseMetric { BigIndexSet.IndicesBaseMetric() }
}
extension BigIndexSet {
struct IndicesMetric: BTreeMetric {
func measure(summary: BigIndexSetSummary, count: Int) -> Int {
summary.indexCount
}
func convertToBaseUnits(_ measuredUnits: Int, in leaf: BigIndexSetLeaf) -> Int {
if measuredUnits >= leaf.data.count {
return leaf.count + 1
} else {
return leaf.data[measuredUnits]
}
}
func convertFromBaseUnits(_ baseUnits: Int, in leaf: BigIndexSetLeaf) -> Int {
let (i, found) = leaf.data.binarySearch(for: baseUnits)
if found {
return i
} else {
assert(i > 0)
return i - 1
}
}
func isBoundary(_ offset: Int, in leaf: BigIndexSetLeaf) -> Bool {
let (_, found) = leaf.data.binarySearch(for: offset)
return found
}
func prev(_ offset: Int, in leaf: BigIndexSetLeaf) -> Int? {
assert(offset > 0 && offset <= leaf.count)
var (i, found) = leaf.data.binarySearch(for: offset)
if found {
i -= 1
}
if i < 0 {
return nil
}
return leaf.data[i - 1]
}
func next(_ offset: Int, in leaf: BigIndexSetLeaf) -> Int? {
assert(offset >= 0 && offset < leaf.count)
var (i, found) = leaf.data.binarySearch(for: offset)
if found {
i += 1
}
if i == leaf.data.count {
return nil
}
return leaf.data[i]
}
var canFragment: Bool {
false
}
var type: BTreeMetricType {
.trailing
}
}
}
extension BTreeMetric<BigIndexSetSummary> where Self == BigIndexSet.IndicesBaseMetric {
static var indices: BigIndexSet.IndicesMetric { BigIndexSet.IndicesMetric() }
}
struct BigIndexSetBuilder {
var b: BTreeBuilder<BigIndexSet>
var leaf: BigIndexSetLeaf
var offsetOfLeaf: Int
var totalCount: Int
init(totalCount: Int) {
self.b = BTreeBuilder<BigIndexSet>()
self.leaf = .zero
self.offsetOfLeaf = 0
self.totalCount = totalCount
}
mutating func append(_ index: Int) {
precondition((leaf.data.isEmpty && index >= offsetOfLeaf) || (index > offsetOfLeaf + leaf.data.last!), "Indices must be inserted in order")
if leaf.data.count == BigIndexSetLeaf.maxSize {
leaf.count = index - offsetOfLeaf
self.offsetOfLeaf = index
b.push(leaf: leaf)
leaf = .zero
}
leaf.data.append(index - offsetOfLeaf)
totalCount = max(totalCount, index + 1)
}
mutating func push(_ indices: inout BigIndexSet) {
push(&indices, slicedBy: indices.startIndex..<indices.endIndex)
}
mutating func push(_ indices: inout BigIndexSet, slicedBy range: Range<BigIndexSet.Index>) {
precondition(range.lowerBound >= indices.startIndex && range.upperBound <= indices.endIndex, "Index out of bounds")
if !leaf.data.isEmpty {
leaf.count = totalCount - offsetOfLeaf
b.push(leaf: leaf)
leaf = .zero
}
let r = Range(range, in: indices.root)
totalCount += r.count
b.push(&indices.root, slicedBy: r)
}
consuming func build() -> BigIndexSet {
assert(totalCount >= offsetOfLeaf)
leaf.count = totalCount - offsetOfLeaf
b.push(leaf: leaf)
return b.build()
}
}
extension BigIndexSet {
mutating func insert(_ integer: Int) {
if contains(integer) {
return
}
if isEmpty || integer > last! {
// If we're inserting at the end, and we're uniquely referenced
// it's ok to mutate self.
var b = BTreeBuilder<Self>()
b.push(&self.root)
b.push(leaf: BigIndexSetLeaf(count: 0, data: [0]))
self = b.build()
return
}
// We're not inserting at the end, so we need to make sure not to mutate
// self while pushing integer. Otherwise when we push the second slice,
// our indices will be off by 1.
var dup = self
var b = BTreeBuilder<Self>()
b.push(&dup.root, slicedBy: 0..<integer)
b.push(leaf: BigIndexSetLeaf(count: 0, data: [0]))
b.push(&dup.root, slicedBy: (integer+1)..<upperBound)
self = b.build()
}
func contains(_ integer: Int) -> Bool {
if integer == 128 {
print("?")
}
if integer >= upperBound || integer < 0 {
return false
}
let i = root.index(startIndex, offsetBy: integer, using: .indicesBaseMetric)
return isBoundary(i)
}
}
extension BigIndexSet: ExpressibleByArrayLiteral {
init(arrayLiteral elements: Int...) {
self.init(elements)
}
}
extension BigIndexSet {
init(_ sequence: some Sequence<Int>) {
var b = BigIndexSetBuilder(totalCount: 0)
for i in sequence {
b.append(i)
}
self = b.build()
}
}
//
// BigIndexSetTests.swift
// WattTests
//
// Created by David Albert on 12/9/23.
//
import XCTest
@testable import Watt
final class BigIndexSetTests: XCTestCase {
func testInit() {
let indices = BigIndexSet(stride(from: 0, to: 200, by: 2))
XCTAssertEqual(indices.upperBound, 199)
for i in 0..<200 {
if i % 2 == 0 {
XCTAssert(indices.contains(i), "should contain \(i)")
} else {
XCTAssert(!indices.contains(i), "should not contain \(i)")
}
}
}
}
extension Range where Bound == Int {
init<Summary>(_ range: Range<BTreeNode<Summary>.Index>, in root: BTreeNode<Summary>) where Summary: BTreeSummary {
range.lowerBound.validate(for: root)
range.upperBound.validate(for: root)
self = range.lowerBound.position..<range.upperBound.position
}
}
//
// LineCache.swift
// Watt
//
// Created by David Albert on 12/9/23.
//
import Foundation
struct LineCache {
var cache: SortedCache<Line>
subscript(offset: Int) -> Line? {
get {
cache[offset]
}
set {
cache[offset] = newValue
}
}
mutating func invalidate(range: Range<Int>) {
if cache.isEmpty {
return
}
// Invalidate the cache, but treat range as closed (e.g. invalidate range.lowerBound..<(range.upperBound + 1)).
// Also invalidate the first index below range.lowerBound
let i = cache.key(before: range.lowerBound) ?? 0
let j = range.upperBound + 1
cache.invalidate(range: i..<j)
}
// This isn't right. I believe the logic should be the same as IntervalCache.invalidate(delta:)
mutating func invalidate<Tree>(delta: BTreeDelta<Tree>) where Tree: BTree {
var count = 0
for element in delta.elements {
switch element {
case .copy(let lowerBound, let upperBound):
if lowerBound > count {
// delete the gap
invalidate(range: count..<lowerBound)
cache.shiftKeys(startingAt: lowerBound, by: count - lowerBound)
}
count = upperBound
case .insert(let node):
// TODO: invalidate something here
cache.shiftKeys(startingAt: count, by: node.count)
count += node.count
}
}
if count < delta.baseCount {
let rangeToDelete = count..<delta.baseCount
cache.invalidate(range: count..<delta.baseCount)
cache.shiftKeys(startingAt: count, by: delta.baseCount - count)
}
}
}
//
// SortedCache.swift
// Watt
//
// Created by David Albert on 12/8/23.
//
import Foundation
struct SortedCache<Element> {
typealias Index = IndexSet.Index
var dictionary: Dictionary<Int, Element>
var keys: IndexSet
subscript(key: Int) -> Element? {
get {
return dictionary[key]
}
set {
if let value = newValue {
if dictionary.updateValue(value, forKey: key) == nil {
assert(!keys.contains(key))
keys.insert(key)
}
} else {
assert(dictionary.keys.contains(key) == keys.contains(key))
if keys.contains(key) {
dictionary.removeValue(forKey: key)
keys.remove(key)
}
}
}
}
mutating func removeAll(keepingCapacity keepCapacity: Bool = false) {
dictionary.removeAll(keepingCapacity: keepCapacity)
keys.removeAll()
}
mutating func invalidate(range: Range<Int>) {
for key in keys[keys.indexRange(in: range)] {
dictionary.removeValue(forKey: key)
}
keys.remove(integersIn: range)
}
func key(before key: Int) -> Int? {
keys.integerLessThan(key)
}
mutating func shiftKeys(startingAt index: Int, by delta: Int) {
guard delta != 0 else { return }
let ks: any Collection<Int> = delta > 0 ? keys.reversed() : keys
for key in ks where key >= index {
let newKey = key + delta
dictionary[newKey] = dictionary[key]!
dictionary.removeValue(forKey: key)
}
keys.shift(startingAt: index, by: delta)
}
}
extension SortedCache: BidirectionalCollection {
var count: Int {
assert(dictionary.count == keys.count)
return dictionary.count
}
var startIndex: Index {
keys.startIndex
}
var endIndex: Index {
keys.endIndex
}
func index(before i: Index) -> Index {
keys.index(before: i)
}
func index(after i: Index) -> Index {
keys.index(after: i)
}
subscript(position: Index) -> (key: Int, element: Element) {
(keys[position], dictionary[keys[position]]!)
}
}
extension SortedCache: ExpressibleByDictionaryLiteral {
init(dictionaryLiteral elements: (Int, Element)...) {
self.init(elements)
}
}
extension SortedCache {
init<S: Sequence>(_ sequence: S) where S.Element == (Int, Element) {
let d = Dictionary(sequence) { x, y in y }
self.dictionary = d
self.keys = IndexSet(d.keys)
}
}
extension SortedCache: Equatable where Element: Equatable {
static func ==(lhs: SortedCache, rhs: SortedCache) -> Bool {
return lhs.dictionary == rhs.dictionary
}
}
extension SortedCache: CustomStringConvertible {
var description: String {
dictionary.description
}
}
extension SortedCache {
static func + (lhs: SortedCache, rhs: SortedCache) -> SortedCache {
var result = lhs
for (key, value) in rhs {
result[key] = value
}
return result
}
}
//
// SortedCache.swift
// WattTests
//
// Created by David Albert on 12/8/23.
//
import XCTest
@testable import Watt
final class SortedCacheTests: XCTestCase {
func testSubscript() {
var dict: SortedCache = [1: "One", 2: "Two"]
XCTAssertEqual(dict[1], "One")
XCTAssertEqual(dict[2], "Two")
dict[1] = "New One"
XCTAssertEqual(dict[1], "New One")
dict[1] = nil
XCTAssertNil(dict[1])
dict[3] = "Three"
XCTAssertEqual(dict[3], "Three")
}
func testRemoveAll() {
var dict: SortedCache = [1: "One", 2: "Two"]
dict.removeAll()
XCTAssertNil(dict[1])
XCTAssertNil(dict[2])
}
func testKeyBefore() {
var cache: SortedCache = [1: "One", 3: "Three", 5: "Five"]
XCTAssertEqual(cache.key(before: 6), 5)
XCTAssertEqual(cache.key(before: 5), 3)
XCTAssertEqual(cache.key(before: 4), 3)
XCTAssertEqual(cache.key(before: 3), 1)
XCTAssertEqual(cache.key(before: 2), 1)
XCTAssertNil(cache.key(before: 1))
XCTAssertNil(cache.key(before: 0))
cache = [:]
XCTAssertNil(cache.key(before: 1))
}
func testInvalidate() {
func t<V>(_ range: Range<Int>, _ cache: SortedCache<V>, _ expected: SortedCache<V>, file: StaticString = #file, line: UInt = #line) where V: Equatable {
var c = cache
c.invalidate(range: range)
XCTAssertEqual(c, expected, file: file, line: line)
}
// unchanged
func u<V>(_ range: Range<Int>, _ cache: SortedCache<V>, file: StaticString = #file, line: UInt = #line) where V: Equatable {
var c = cache
c.invalidate(range: range)
XCTAssertEqual(c, cache, file: file, line: line)
}
// remove middle
t(2..<4, c(1...5, "a"..."e"), [1: "a", 4: "d", 5: "e"])
// doesn't have to be contiguous
t(2..<4, c(1...5, "a"..."e", without: 4), [1: "a", 5: "e"])
// empty ranges are unchanged
u(0..<0, c(1...4))
u(1..<1, c(1...4))
u(3..<3, c(1...4))
u(4..<4, c(1...4))
u(5..<5, c(1...4))
// empty prefix
u(0..<2, c(2...5))
// empty suffix
u(6..<10, c(2...5))
// overlapping prefix
t(0..<2, c(1...5), c(2...5))
// overlapping suffix
t(5..<10, c(1...5), c(1...4))
// covers fully
t(5..<10, c(5..<10), [:])
// subsumes
t(0..<15, c(5..<10), [:])
// inside
t(5..<10, c(0..<20), c(0..<5) + c(10..<20))
}
}
extension Unicode.Scalar: Strideable {
public func distance(to other: Unicode.Scalar) -> Int {
Int(other.value - value)
}
public func advanced(by n: Int) -> Unicode.Scalar {
Unicode.Scalar(Int(value) + n)!
}
}
fileprivate func c(_ kr: Range<Int>, without: Int...) -> SortedCache<Int> {
cn(kr, kr, 1, without: without)
}
fileprivate func c(_ kr: ClosedRange<Int>, without: Int...) -> SortedCache<Int> {
cn(kr, kr, 1, without: without)
}
fileprivate func c(_ kr: Range<Int>, _ vr: Range<Unicode.Scalar>, without: Int...) -> SortedCache<Unicode.Scalar> {
cn(kr, vr, 1, without: without)
}
fileprivate func c(_ kr: ClosedRange<Int>, _ vr: ClosedRange<Unicode.Scalar>, without: Int...) -> SortedCache<Unicode.Scalar> {
cn(kr, vr, 1, without: without)
}
fileprivate func c2(_ kr: Range<Int>, without: Int...) -> SortedCache<Int> {
cn(kr, kr, 2, without: without)
}
fileprivate func c2(_ kr: ClosedRange<Int>, without: Int...) -> SortedCache<Int> {
cn(kr, kr, 2, without: without)
}
fileprivate func c2(_ kr: Range<Int>, _ vr: Range<Unicode.Scalar>) -> SortedCache<Unicode.Scalar> {
cn(kr, vr, 2)
}
fileprivate func c2(_ kr: ClosedRange<Int>, _ vr: ClosedRange<Unicode.Scalar>) -> SortedCache<Unicode.Scalar> {
cn(kr, vr, 2)
}
fileprivate func cn<V>(_ kr: Range<Int>, _ vr: Range<V>, _ stride: Int, without: [Int] = []) -> SortedCache<V> where V: Hashable & Strideable, V.Stride == Int {
assert(kr.count == vr.count)
let keys = Swift.stride(from: kr.lowerBound, to: kr.upperBound, by: stride)
let vals = Swift.stride(from: vr.lowerBound, to: vr.upperBound, by: stride)
var dict = SortedCache(zip(keys, vals))
for k in without {
dict[k] = nil
}
return dict
}
fileprivate func cn<V>(_ kr: ClosedRange<Int>, _ vr: ClosedRange<V>, _ stride: Int, without: [Int] = []) -> SortedCache<V> where V: Hashable & Strideable, V.Stride == Int {
assert(kr.count == vr.count)
let keys = Swift.stride(from: kr.lowerBound, through: kr.upperBound, by: stride)
let vals = Swift.stride(from: vr.lowerBound, through: vr.upperBound, by: stride)
var dict = SortedCache(zip(keys, vals))
for k in without {
dict[k] = nil
}
return dict
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment