Skip to content

Instantly share code, notes, and snippets.

@leventov leventov/hash_table.py
Last active Dec 6, 2015

Embed
What would you like to do?
Proposed optimization for shift removal procedure in hash tables with linear probing. Inspired by https://github.com/apple/swift/blob/8d9ef80304d7b36e13619ea50e6e76f3ec9221ba/stdlib/public/core/HashedCollections.swift.gyb#L3221-L3279
# (Pseudocode)
def shift_distance(chain_end, base):
return (chain_end - base) & (capacity - 1)
def next(index):
return (index + 1) & (capacity - 1)
def prev(index):
return (index - 1) & (capacity - 1)
# 1. Naive shift remove procedure:
def shift_remove_naive(index_to_remove):
index_to_shift = next(index_to_remove)
while !slot_empty(index_to_shift):
base = index(hash(key(index_to_shift)))
if shift_distance(index_to_shift, base) >= shift_distance(index_to_shift, index_to_remove):
replace_slot(index_to_remove, index_to_shift)
index_to_remove = index_to_shift
index_to_shift = next(index_to_shift)
clear_slot(index_to_remove)
# 2. Shift remove procedure by Swift devs,
# https://github.com/apple/swift/blob/8d9ef80304d7b36e13619ea50e6e76f3ec9221ba/stdlib/public/core/HashedCollections.swift.gyb#L3221-L3279
def collision_chain_end(index):
last_index = next(index)
while !slot_empty(last_index):
last_index = next(last_index)
return prev(last_index)
def shift_remove_swift(index_to_remove):
last_index = collision_chain_end(index_to_remove)
outer_loop:
while true:
index_to_shift = last_index
while index_to_shift != index_to_remove:
base = index(hash(key(index_to_shift)))
if shift_distance(index_to_shift, base) >= shift_distance(index_to_shift, index_to_remove):
replace_slot(index_to_remove, index_to_shift)
index_to_remove = index_to_shift
contunue outer_loop
index_to_shift = prev(index_to_shift)
# not shifted in the above inner loop
# break from the outer loop
break
clear_slot(index_to_remove)
# The approach by Swift devs allows to make less replacements, and probably make expensive
# `index(hash(key(index)))` procedure (expensive because hash code computation, if not cached
# in the key object, is potentially expensive) for less keys. The "naive" shift remove always
# performs this procedure for all keys till the collistion chain end, but Swift's shift remove
# may perform this procedure only for some tail of the collision chain.
# OTOH, Swift's shift remove may perform this procedure for some keys several times.
# 3. The proposed optimization:
def shift_remove_optimized(index_to_remove):
last_index = collision_chain_end(index_to_remove)
all:
if last_index != index_to_remove:
# Fast path
if last_index == index_to_remove + 1:
if index(hash(key(last_index))) != last_index:
replace_slot(index_to_remove, last_index)
index_to_remove = last_index
else:
chain_len = shift_distance(last_index, index_to_remove)
# This stack should be on-stack (sorry for tautology), in languages like C.
# chain_len - 1 is the max capacity needed for the stack
stack = array(chain_len - 1)
stack_i = 0
index_to_shift = last_index
while true:
base = index(hash(key(index_to_shift)))
key_distance = shift_distance(index_to_shift, base)
if key_distance >= shift_distance(index_to_shift, index_to_remove):
replace_slot(index_to_remove, index_to_shift)
index_to_remove = index_to_shift
break
else:
index_to_shift = prev(index_to_shift)
if index_to_shift == index_to_remove:
break all
stack[stack_i] = key_distance
stack_i += 1
while index_to_shift != last_index:
index_to_shift = last_index
stack_i = 0
while true:
key_distance = stack[stack_i]
if key_distance >= shift_distance(index_to_shift, index_to_remove):
replace_slot(index_to_remove, index_to_shift)
index_to_remove = index_to_shift
break
else:
index_to_shift = prev(index_to_shift)
if index_to_shift == index_to_remove:
break all
clear_slot(index_to_remove)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.