Skip to content

Instantly share code, notes, and snippets.

@kingcos
Created August 15, 2021 13:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kingcos/b80987a2afde9894e1fd6262b0978f9a to your computer and use it in GitHub Desktop.
Save kingcos/b80987a2afde9894e1fd6262b0978f9a to your computer and use it in GitHub Desktop.
//
// HashTable.swift
//
// Created by kingcos on 2021/8/15.
//
import Foundation
/// A simple bitmap of a fixed number of bits, implementing a sorted set of
/// small nonnegative Int values.
/// 一个固定位数的简单位图,实现了一组已排序的小的非负的 Int 值。
///
/// Because `_UnsafeBitset` implements a flat bit vector, it isn't suitable for
/// holding arbitrarily large integers. The maximal element a bitset can store
/// is fixed at its initialization.
/// 由于 `_UnsafeBitset` 实现了一个平面的位向量,因此它不适合存放任意大的整数。
/// 比特集可以存储的最大元素在其初始化时就已经固定了。
struct UnsafeBitset {
let words: UnsafeMutablePointer<Word>
let wordCount: Int
init(words: UnsafeMutablePointer<Word>, wordCount: Int) {
self.words = words
self.wordCount = wordCount
}
}
extension UnsafeBitset {
static func word(for element: Int) -> Int {
// Note: We perform on UInts to get faster unsigned math (shifts).
let element = UInt(bitPattern: element)
let capacity = UInt(bitPattern: Word.capacity)
return Int(bitPattern: element / capacity)
}
static func bit(for element: Int) -> Int {
// Note: We perform on UInts to get faster unsigned math (masking).
let element = UInt(bitPattern: element)
let capacity = UInt(bitPattern: Word.capacity)
return Int(bitPattern: element % capacity)
}
static func join(word: Int, bit: Int) -> Int {
word &* Word.capacity &+ bit
}
static func wordCount(forCapacity capacity: Int) -> Int {
word(for: capacity &+ Word.capacity &- 1)
}
}
extension UnsafeBitset {
struct Word {
var value: UInt
init(_ value: UInt) {
self.value = value
}
}
}
extension UnsafeBitset.Word {
static var capacity: Int {
get {
UInt.bitWidth
}
}
var complement: UnsafeBitset.Word {
get {
UnsafeBitset.Word(~value)
}
}
var minimum: Int? {
get {
guard value != 0 else { return nil }
return value.trailingZeroBitCount
}
}
func subtracting(elementsBelow bit: Int) -> UnsafeBitset.Word {
let mask = UInt.max &<< bit
return UnsafeBitset.Word(value & mask)
}
@discardableResult
mutating func uncheckedInsert(_ bit: Int) -> Bool {
let mask: UInt = 1 &<< bit
let inserted = value & mask == 0
value |= mask
return inserted
}
}
// Word implements Sequence by using a copy of itself as its Iterator.
// Iteration with `next()` destroys the word's value; however, this won't cause
// problems in normal use, because `next()` is usually called on a separate
// iterator, not the original word.
// 字(Word)通过使用自身的一个副本作为 Iterator(迭代器)来实现 Sequence。
// 用 `next()` 进行的迭代会破坏字的值;但是,这在正常使用中不会造成问题,
// 因为 `next()` 通常是在一个单独的迭代器上调用,而不是在原始字上。
extension UnsafeBitset.Word: Sequence, IteratorProtocol {
var count: Int {
value.nonzeroBitCount
}
/// Return the index of the lowest set bit in this word,
/// and also destructively clear it.
/// 返回这个字中最低设置位的索引,并破坏性地清除它。
mutating func next() -> Int? {
guard value != 0 else { return nil }
let bit = value.trailingZeroBitCount
value &= value &- 1 // Clear lowest nonzero bit.
return bit
}
}
// -----
struct HashTable {
typealias Word = UnsafeBitset.Word
var words: UnsafeMutablePointer<Word>
let bucketMask: Int
init(words: UnsafeMutablePointer<Word>, bucketCount: Int) {
self.words = words
self.bucketMask = bucketCount &- 1
}
var bucketCount: Int {
get {
bucketMask &+ 1
}
}
var wordCount: Int {
UnsafeBitset.wordCount(forCapacity: bucketCount)
}
}
extension HashTable {
struct Bucket {
var offset: Int
init(offset: Int) {
self.offset = offset
}
init(word: Int, bit: Int) {
self.offset = UnsafeBitset.join(word: word, bit: bit)
}
var word: Int {
UnsafeBitset.word(for: offset)
}
var bit: Int {
get {
return UnsafeBitset.bit(for: offset)
}
}
}
func idealBucket(forHashValue hashValue: Int) -> Bucket {
return Bucket(offset: hashValue & bucketMask)
}
}
extension HashTable: Sequence {
struct Iterator: IteratorProtocol {
let hashTable: HashTable
var wordIndex: Int
var word: Word
init(_ hashTable: HashTable) {
self.hashTable = hashTable
self.wordIndex = 0
self.word = hashTable.words[0]
if hashTable.bucketCount < Word.capacity {
// TODO
}
}
mutating func next() -> Bucket? {
if let bit = word.next() {
return Bucket(word: wordIndex, bit: bit)
}
while wordIndex + 1 < hashTable.wordCount {
wordIndex += 1
word = hashTable.words[wordIndex]
if let bit = word.next() {
return Bucket(word: wordIndex, bit: bit)
}
}
return nil
}
}
func makeIterator() -> Iterator {
Iterator(self)
}
}
extension HashTable {
func nextHole(atOrAfter bucket: Bucket) -> Bucket {
// Note that if we have only a single partial word, its out-of-bounds bits
// are guaranteed to be all set, so the formula below gives correct results.
var word = bucket.word
if let bit =
words[word]
.complement
.subtracting(elementsBelow: bucket.bit)
.minimum {
return Bucket(word: word, bit: bit)
}
var wrap = false
while true {
// _precondition(!wrap, "Hash table has no holes")
word &+= 1
if word == wordCount {
wrap = true
word = 0
}
if let bit = words[word].complement.minimum {
return Bucket(word: word, bit: bit)
}
}
}
func insertNew(hashValue: Int) -> Bucket {
let hole = nextHole(atOrAfter: idealBucket(forHashValue: hashValue))
insert(hole)
return hole
}
func insert(_ bucket: Bucket) {
words[bucket.word].uncheckedInsert(bucket.bit)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment