Skip to content

Instantly share code, notes, and snippets.

@hackwaly
Forked from Guest0x0/HAMT.ml
Created August 8, 2023 02:57
Show Gist options
  • Save hackwaly/46c9481e9f7fc085829ca1439208ef8d to your computer and use it in GitHub Desktop.
Save hackwaly/46c9481e9f7fc085829ca1439208ef8d to your computer and use it in GitHub Desktop.
Hash Array Mapped Trie implementation & benchmark in OCaml
(* HAMT v.s. AVL benchmark.
test for their usage as string map.
usage:
> ocamlopt HAMT.ml -o <executable-name>
> <executable-name> (avl|hamt) (random|ordered) <key-length>
time data of the form:
<tree-size> <add-time> <find-time>
will be displayed in stdout,
some progress information indicating the program is progressively running
will be displayed in stderr. *)
let ctpop =
let low_of_2bits = 0x55555555 in
let low_of_4bits = 0x33333333 in
let low_of_8bits = 0x0f0f0f0f in
fun n ->
(* let c2bits = (n land low_of_2bits ) + (n lsr 1) land low_of_2bits in *)
let c2bits = n - (n lsr 1) land low_of_2bits in
let c4bits = (c2bits land low_of_4bits ) + (c2bits lsr 2) land low_of_4bits in
let c8bits = (c4bits land low_of_8bits ) + (c4bits lsr 4) land low_of_8bits in
(* let c16bits = (c8bits land low_of_16bits) + (c8bits lsr 8) land low_of_16bits in *)
let c16bits = c8bits + (c8bits lsr 8) in
(c16bits + (c16bits lsr 16)) land 0x3f
let get_seg offset len value =
let mask = ((1 lsl len) - 1) in
(value lsr (32 - len - offset)) land mask
type bitmap = int
let bitmap_empty = 0
let bitmap_mem idx bitmap =
(bitmap land (1 lsl idx)) <> 0
let bitmap_get idx bitmap =
let below_mask = (1 lsl idx) - 1 in
ctpop (bitmap land below_mask)
let bitmap_set idx bitmap =
bitmap lor (1 lsl idx)
let bitmap_remove idx bitmap =
bitmap lxor (1 lsl idx)
type 'a sparse_array =
{ bitmap : bitmap
; data : 'a Array.t }
let sa_mem idx sa =
bitmap_mem idx sa.bitmap
let sa_find_opt idx sa =
match bitmap_mem idx sa.bitmap with
| false ->
None
| true ->
Some(sa.data.(bitmap_get idx sa.bitmap))
let sa_add idx v sa =
let pos = bitmap_get idx sa.bitmap in
let data' = Array.init (Array.length sa.data + 1) @@ fun i ->
if i < pos
then sa.data.(i)
else if i = pos
then v
else sa.data.(i - 1)
in
{ bitmap = bitmap_set idx sa.bitmap
; data = data' }
let sa_update idx f sa =
let pos = bitmap_get idx sa.bitmap in
let data' = Array.copy sa.data in
data'.(pos) <- f sa.data.(pos);
{ sa with data = data' }
let seg_len = 5
type ('k, 'v) t =
| Empty
| Leaf of 'k * 'v
| Collision of ('k * 'v) list
| Node of ('k, 'v) t sparse_array
let empty = Empty
let find_opt k t =
let h = Hashtbl.hash k in
let rec loop offset t =
match t with
| Leaf(k', v) when k' = k ->
Some v
| Empty
| Leaf(_, _) ->
None
| Collision buckets ->
List.assoc_opt k buckets
| Node sa ->
let idx = get_seg offset seg_len h in
Option.bind (sa_find_opt idx sa) (loop (offset + seg_len))
in
loop 0 t
let rec insert_assoc k v = function
| [] ->
[k, v]
| (k', _) :: tl when k' = k ->
(k, v) :: tl
| (k', v') :: tl ->
(k', v') :: insert_assoc k v tl
let add k v t =
let h = Hashtbl.hash k in
let rec loop offset t =
match t with
| Empty when offset + seg_len >= 32 ->
Leaf(k, v)
| Empty ->
let idx = get_seg offset seg_len h in
Node {
bitmap = bitmap_set idx bitmap_empty;
data = [| loop (offset + seg_len) Empty |]
}
| Leaf(k', _) when k' = k ->
Leaf(k, v)
| Leaf(k', v') ->
Collision [k, v; k', v']
| Collision buckets ->
Collision(insert_assoc k v buckets)
| Node sa ->
let idx = get_seg offset seg_len h in
if sa_mem idx sa
then
Node(sa_update idx (loop (offset + seg_len)) sa)
else
Node(sa_add idx (loop (offset + seg_len) Empty) sa)
in
loop 0 t
type ('v, 'map) string_map = {
empty : 'map;
find : string -> 'map -> 'v option;
add : string -> 'v -> 'map -> 'map;
}
type test_config = {
key_length : int;
tree_size : int;
repeat_count : int;
}
let random_key cfg =
let len = Random.int (cfg.key_length / 2) + cfg.key_length in
String.init len (fun _ -> Char.chr (Random.int 26 + Char.code 'a'))
let ordered_key cfg =
let key = Bytes.init cfg.key_length (fun _ -> 'a') in
let curr_index = ref 0 in
let rec next_key () =
assert (!curr_index < Bytes.length key);
let c = Bytes.get key !curr_index in
if c = 'z'
then (incr curr_index; next_key ())
else Bytes.set key !curr_index (Char.chr (Char.code c + 1))
in
fun () ->
next_key ();
Bytes.to_string key
let random_test cfg map =
let init_data = Array.init cfg.tree_size (fun i -> (random_key cfg, i)) in
let tree = Array.fold_left (fun tree (k, v) -> map.add k v tree) map.empty init_data in
let insert_data = Array.init cfg.repeat_count (fun i -> (random_key cfg, i)) in
let _ = Gc.full_major () in
let t0 = Sys.time () in
let _ = Array.fold_left (fun tree (k, v) -> map.add k v tree) tree insert_data in
let t1 = Sys.time () in
let find_data =
Array.init cfg.repeat_count (fun _ -> init_data.(Random.int cfg.tree_size))
in
let _ = Gc.full_major () in
let t2 = Sys.time () in
let _ = Array.iter (fun (k, v) -> assert (map.find k tree = Some v)) find_data in
let t3 = Sys.time () in
(t1 -. t0, t3 -. t2)
let ordered_test cfg map =
let next_key = ordered_key cfg in
let init_data = Array.init cfg.tree_size (fun i -> (next_key (), i)) in
let tree = Array.fold_left (fun tree (k, v) -> map.add k v tree) map.empty init_data in
let insert_data = Array.init cfg.repeat_count (fun i -> (next_key (), i)) in
let _ = Gc.full_major () in
let t0 = Sys.time () in
let _ = Array.fold_left (fun tree (k, v) -> map.add k v tree) tree insert_data in
let t1 = Sys.time () in
let find_data =
Array.init cfg.repeat_count (fun _ -> init_data.(Random.int cfg.tree_size))
in
let _ = Gc.full_major () in
let t2 = Sys.time () in
let _ = Array.iter (fun (k, v) -> assert (map.find k tree = Some v)) find_data in
let t3 = Sys.time () in
(t1 -. t0, t3 -. t2)
module String_Map = Map.Make(String)
let avl_map = {
empty = String_Map.empty;
find = String_Map.find_opt;
add = String_Map.add;
}
let hamt_map = {
empty;
find = find_opt;
add;
}
let _ =
let map = Sys.argv.(1) in
let () = Random.self_init () in
let test cfg =
match Sys.argv.(2) with
| "random" -> random_test cfg
| "ordered" -> ordered_test cfg
| _ -> exit 1
in
let key_length = int_of_string Sys.argv.(3) in
let rec loop index tree_size =
if index > 10
then ()
else
let config = { key_length; tree_size; repeat_count = 10000 } in
Printf.eprintf "testing size=%d...\n" tree_size;
flush_all ();
let (t_insert, t_find) =
match map with
| "avl" -> test config avl_map
| "hamt" -> test config hamt_map
| _ -> exit 1
in
Printf.printf "%d %f %f\n" tree_size t_insert t_find;
loop (index + 1) (tree_size * 2)
in
loop 1 5000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment