-
-
Save kingcos/b80987a2afde9894e1fd6262b0978f9a to your computer and use it in GitHub Desktop.
Summarized from https://github.com/apple/swift.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// HashTable.swift | |
// | |
// Created by kingcos on 2021/8/15. | |
// | |
import Foundation | |
/// A simple bitmap of a fixed number of bits, implementing a sorted set of | |
/// small nonnegative Int values. | |
/// 一个固定位数的简单位图,实现了一组已排序的小的非负的 Int 值。 | |
/// | |
/// Because `_UnsafeBitset` implements a flat bit vector, it isn't suitable for | |
/// holding arbitrarily large integers. The maximal element a bitset can store | |
/// is fixed at its initialization. | |
/// 由于 `_UnsafeBitset` 实现了一个平面的位向量,因此它不适合存放任意大的整数。 | |
/// 比特集可以存储的最大元素在其初始化时就已经固定了。 | |
struct UnsafeBitset { | |
let words: UnsafeMutablePointer<Word> | |
let wordCount: Int | |
init(words: UnsafeMutablePointer<Word>, wordCount: Int) { | |
self.words = words | |
self.wordCount = wordCount | |
} | |
} | |
extension UnsafeBitset { | |
static func word(for element: Int) -> Int { | |
// Note: We perform on UInts to get faster unsigned math (shifts). | |
let element = UInt(bitPattern: element) | |
let capacity = UInt(bitPattern: Word.capacity) | |
return Int(bitPattern: element / capacity) | |
} | |
static func bit(for element: Int) -> Int { | |
// Note: We perform on UInts to get faster unsigned math (masking). | |
let element = UInt(bitPattern: element) | |
let capacity = UInt(bitPattern: Word.capacity) | |
return Int(bitPattern: element % capacity) | |
} | |
static func join(word: Int, bit: Int) -> Int { | |
word &* Word.capacity &+ bit | |
} | |
static func wordCount(forCapacity capacity: Int) -> Int { | |
word(for: capacity &+ Word.capacity &- 1) | |
} | |
} | |
extension UnsafeBitset { | |
struct Word { | |
var value: UInt | |
init(_ value: UInt) { | |
self.value = value | |
} | |
} | |
} | |
extension UnsafeBitset.Word { | |
static var capacity: Int { | |
get { | |
UInt.bitWidth | |
} | |
} | |
var complement: UnsafeBitset.Word { | |
get { | |
UnsafeBitset.Word(~value) | |
} | |
} | |
var minimum: Int? { | |
get { | |
guard value != 0 else { return nil } | |
return value.trailingZeroBitCount | |
} | |
} | |
func subtracting(elementsBelow bit: Int) -> UnsafeBitset.Word { | |
let mask = UInt.max &<< bit | |
return UnsafeBitset.Word(value & mask) | |
} | |
@discardableResult | |
mutating func uncheckedInsert(_ bit: Int) -> Bool { | |
let mask: UInt = 1 &<< bit | |
let inserted = value & mask == 0 | |
value |= mask | |
return inserted | |
} | |
} | |
// Word implements Sequence by using a copy of itself as its Iterator. | |
// Iteration with `next()` destroys the word's value; however, this won't cause | |
// problems in normal use, because `next()` is usually called on a separate | |
// iterator, not the original word. | |
// 字(Word)通过使用自身的一个副本作为 Iterator(迭代器)来实现 Sequence。 | |
// 用 `next()` 进行的迭代会破坏字的值;但是,这在正常使用中不会造成问题, | |
// 因为 `next()` 通常是在一个单独的迭代器上调用,而不是在原始字上。 | |
extension UnsafeBitset.Word: Sequence, IteratorProtocol { | |
var count: Int { | |
value.nonzeroBitCount | |
} | |
/// Return the index of the lowest set bit in this word, | |
/// and also destructively clear it. | |
/// 返回这个字中最低设置位的索引,并破坏性地清除它。 | |
mutating func next() -> Int? { | |
guard value != 0 else { return nil } | |
let bit = value.trailingZeroBitCount | |
value &= value &- 1 // Clear lowest nonzero bit. | |
return bit | |
} | |
} | |
// ----- | |
struct HashTable { | |
typealias Word = UnsafeBitset.Word | |
var words: UnsafeMutablePointer<Word> | |
let bucketMask: Int | |
init(words: UnsafeMutablePointer<Word>, bucketCount: Int) { | |
self.words = words | |
self.bucketMask = bucketCount &- 1 | |
} | |
var bucketCount: Int { | |
get { | |
bucketMask &+ 1 | |
} | |
} | |
var wordCount: Int { | |
UnsafeBitset.wordCount(forCapacity: bucketCount) | |
} | |
} | |
extension HashTable { | |
struct Bucket { | |
var offset: Int | |
init(offset: Int) { | |
self.offset = offset | |
} | |
init(word: Int, bit: Int) { | |
self.offset = UnsafeBitset.join(word: word, bit: bit) | |
} | |
var word: Int { | |
UnsafeBitset.word(for: offset) | |
} | |
var bit: Int { | |
get { | |
return UnsafeBitset.bit(for: offset) | |
} | |
} | |
} | |
func idealBucket(forHashValue hashValue: Int) -> Bucket { | |
return Bucket(offset: hashValue & bucketMask) | |
} | |
} | |
extension HashTable: Sequence { | |
struct Iterator: IteratorProtocol { | |
let hashTable: HashTable | |
var wordIndex: Int | |
var word: Word | |
init(_ hashTable: HashTable) { | |
self.hashTable = hashTable | |
self.wordIndex = 0 | |
self.word = hashTable.words[0] | |
if hashTable.bucketCount < Word.capacity { | |
// TODO | |
} | |
} | |
mutating func next() -> Bucket? { | |
if let bit = word.next() { | |
return Bucket(word: wordIndex, bit: bit) | |
} | |
while wordIndex + 1 < hashTable.wordCount { | |
wordIndex += 1 | |
word = hashTable.words[wordIndex] | |
if let bit = word.next() { | |
return Bucket(word: wordIndex, bit: bit) | |
} | |
} | |
return nil | |
} | |
} | |
func makeIterator() -> Iterator { | |
Iterator(self) | |
} | |
} | |
extension HashTable { | |
func nextHole(atOrAfter bucket: Bucket) -> Bucket { | |
// Note that if we have only a single partial word, its out-of-bounds bits | |
// are guaranteed to be all set, so the formula below gives correct results. | |
var word = bucket.word | |
if let bit = | |
words[word] | |
.complement | |
.subtracting(elementsBelow: bucket.bit) | |
.minimum { | |
return Bucket(word: word, bit: bit) | |
} | |
var wrap = false | |
while true { | |
// _precondition(!wrap, "Hash table has no holes") | |
word &+= 1 | |
if word == wordCount { | |
wrap = true | |
word = 0 | |
} | |
if let bit = words[word].complement.minimum { | |
return Bucket(word: word, bit: bit) | |
} | |
} | |
} | |
func insertNew(hashValue: Int) -> Bucket { | |
let hole = nextHole(atOrAfter: idealBucket(forHashValue: hashValue)) | |
insert(hole) | |
return hole | |
} | |
func insert(_ bucket: Bucket) { | |
words[bucket.word].uncheckedInsert(bucket.bit) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment