Created
February 21, 2022 00:37
-
-
Save jdh30/354371c501c375704f8a25598a3cff6a to your computer and use it in GitHub Desktop.
Open addressed hash table in OCaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let primes = | |
[|3; 7; 13; 31; 61; 127; 251; 509; 1021; 2039; 4093; 8191; 16381; 32749; | |
65521; 131071; 262139; 524287; 1048573; 2097143; 4194301; 8388593; | |
16777213; 33554393; 67108859; 134217689; 268435399; 536870909; 1073741789; | |
2147483647|] | |
let mods = | |
[|(fun n -> n mod 3); | |
(fun n -> n mod 7); | |
(fun n -> n mod 13); | |
(fun n -> n mod 31); | |
(fun n -> n mod 61); | |
(fun n -> n mod 127); | |
(fun n -> n mod 251); | |
(fun n -> n mod 509); | |
(fun n -> n mod 1021); | |
(fun n -> n mod 2039); | |
(fun n -> n mod 4093); | |
(fun n -> n mod 8191); | |
(fun n -> n mod 16381); | |
(fun n -> n mod 32749); | |
(fun n -> n mod 65521); | |
(fun n -> n mod 131071); | |
(fun n -> n mod 262139); | |
(fun n -> n mod 524287); | |
(fun n -> n mod 1048573); | |
(fun n -> n mod 2097143); | |
(fun n -> n mod 4194301); | |
(fun n -> n mod 8388593); | |
(fun n -> n mod 16777213); | |
(fun n -> n mod 33554393); | |
(fun n -> n mod 67108859); | |
(fun n -> n mod 134217689); | |
(fun n -> n mod 268435399); | |
(fun n -> n mod 536870909); | |
(fun n -> n mod 1073741789); | |
(fun n -> n mod 2147483647)|] | |
module type HASH = sig | |
type t | |
[@@deriving show] | |
val empty : t | |
val equal : t -> t -> bool | |
val hash : t -> int | |
end | |
module HashTable(Key: HASH) : sig | |
type 'v t | |
val make : unit -> 'v t | |
val add : 'v t -> Key.t -> 'v -> unit | |
val remove : 'v t -> Key.t -> unit | |
val mem : 'v t -> Key.t -> bool | |
val find : 'v t -> Key.t -> 'v | |
val find_opt : 'v t -> Key.t -> 'v option | |
val update : 'v t -> Key.t -> ('v -> 'v) -> unit | |
val fold : ('a -> Key.t -> 'v -> 'a) -> 'a -> 'v t -> 'a | |
val length : 'v t -> int | |
val to_array : 'v t -> (Key.t * 'v) array | |
val of_array : (Key.t * 'v) array -> 'v t | |
val of_seq : (Key.t * 'v) Seq.t -> 'v t | |
val pp : Format.formatter -> (Format.formatter -> 'v -> unit) -> 'v t -> unit | |
val show : ('v -> string) -> 'v t -> string | |
end = struct | |
type 'v entry = | |
{ mutable hash: int; | |
mutable key: Key.t; | |
mutable value: 'v } | |
type 'v t = | |
{ mutable capacity: int; | |
mutable entries: 'v entry array; | |
mutable count: int } | |
let emptyEntry = Obj.magic() | |
let make() = | |
let capacity = 0 in | |
{ capacity = capacity; | |
entries = Array.make primes.(capacity) emptyEntry; | |
count = 0 } | |
let hash x = abs(98299 * Key.hash x) | |
let is_match h k entry = | |
entry.hash = h && Key.equal entry.key k | |
let is_empty_entry entry = entry == emptyEntry | |
let delete_at t i = | |
t.entries.(i) <- emptyEntry | |
let next n i = | |
if i+1 = n then 0 else i+1 | |
let add_aux t modulo h k v = | |
let rec loop i = | |
let entry = t.entries.(i) in | |
if is_empty_entry entry then | |
( t.count <- t.count + 1; | |
t.entries.(i) <- {hash=h; key=k; value=v} ) | |
else if is_match h k entry then | |
( if t.entries.(i).value != v then | |
t.entries.(i).value <- v ) | |
else loop (next (Array.length t.entries) i) in | |
loop(modulo h) | |
let clone t = | |
let old_entries = t.entries in | |
t.count <- 0; | |
t.entries <- Array.make primes.(t.capacity) emptyEntry; | |
let modulo = mods.(t.capacity) in | |
old_entries | |
|> Array.iter (fun entry -> | |
if not(is_empty_entry entry) then | |
add_aux t modulo entry.hash entry.key entry.value) | |
let add t k v = | |
if 5*t.count >= Array.length t.entries then | |
( t.capacity <- t.capacity + 1; | |
clone t ); | |
add_aux t mods.(t.capacity) (hash k) k v | |
let mem t k = | |
let h = hash k in | |
let rec loop i = | |
let entry = t.entries.(i) in | |
not(is_empty_entry entry) && | |
(is_match h k entry || loop (next (Array.length t.entries) i)) in | |
loop(mods.(t.capacity) h) | |
let find t k = | |
let h = hash k in | |
let rec loop i = | |
let entry = t.entries.(i) in | |
if is_empty_entry entry then raise Not_found | |
else if is_match h k entry then entry.value | |
else loop (next (Array.length t.entries) i) in | |
loop(mods.(t.capacity) h) | |
let find_opt t k = try Some(find t k) with Not_found -> None | |
let update t k f = | |
let h = hash k in | |
let rec loop i = | |
let entry = t.entries.(i) in | |
if is_empty_entry entry then () | |
else if is_match h k entry then entry.value <- f entry.value | |
else loop (next (Array.length t.entries) i) in | |
loop(mods.(t.capacity) h) | |
let remove t k = | |
let modulo = mods.(t.capacity) in | |
let h = hash k in | |
let n = Array.length t.entries in | |
let rec shuffle i j = | |
let j = next n j in | |
let entry = t.entries.(j) in | |
if is_empty_entry entry then delete_at t i else | |
let k = modulo entry.hash in | |
if (j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j)) then | |
( t.entries.(i) <- t.entries.(j); | |
shuffle j j ) | |
else shuffle i j in | |
let rec loop i = | |
let entry = t.entries.(i) in | |
if is_empty_entry entry then () | |
else if is_match h k entry then | |
( t.count <- t.count - 1; | |
shuffle i i ) | |
else loop (next n i) in | |
loop(modulo h); | |
if 50*t.count < n && t.capacity > 0 then | |
( t.capacity <- t.capacity - 1; | |
clone t ) | |
let fold f a t = | |
Array.fold_left (fun a entry -> | |
if is_empty_entry entry then a else | |
f a entry.key entry.value) a t.entries | |
let length t = | |
Printf.printf "t.count=%d, fold=%d\n%!" t.count (fold (fun n _ _ -> n+1) 0 t); | |
t.count | |
let to_array t = | |
let kvs = Array.make (length t) (Obj.magic()) in | |
let _ = fold (fun dst k v -> kvs.(dst) <- (k, v); dst+1) 0 t in | |
kvs | |
let of_array kvs = | |
let h = make() in | |
for i=0 to Array.length kvs-1 do | |
let k, v = kvs.(i) in | |
add h k v | |
done; | |
h | |
let of_seq kvs = | |
let h = make() in | |
Seq.iter (fun (k, v) -> add h k v) kvs; | |
h | |
let pp fmt pp_v xs = | |
Format.fprintf fmt "[|@["; | |
let _ = | |
fold (fun not_first k v -> | |
if not_first then | |
Format.fprintf fmt ";@ "; | |
Format.fprintf fmt "@[%a,@ %a@]" Key.pp k pp_v v; | |
true) false xs in | |
Format.fprintf fmt "@]|]" | |
let show show xs = | |
let buf = Buffer.create (2*length xs) in | |
let fmt = Format.formatter_of_buffer buf in | |
pp fmt (fun fmt x -> Format.fprintf fmt "%s" (show x)) xs; | |
Buffer.to_bytes buf | |
|> Bytes.to_string | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment