Skip to content

Instantly share code, notes, and snippets.

@stedolan
Created November 28, 2021 11:58
Show Gist options
  • Save stedolan/318f87db9f59f1acea771e7f4dd59cd4 to your computer and use it in GitHub Desktop.
Save stedolan/318f87db9f59f1acea771e7f4dd59cd4 to your computer and use it in GitHub Desktop.
type bigstring = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
let alloc_bigstring size = Bigarray.Array1.create Char Bigarray.c_layout size
external bigstring_refcount : bigstring -> int = "pinbuf_bigstring_refcount" [@@noalloc]
module Pinbuf (Size : sig val size : int end) : sig
type t = private bigstring
val alloc : unit -> t
val release : t -> unit
val count_buffers : unit -> int
end = struct
open Bigarray
type t = bigstring
let has_proxy str =
match bigstring_refcount str with
| 1 -> false
| 2 -> true
| _ -> assert false
let mk_proxy str =
assert (not (has_proxy str));
let str' = Array1.sub str 0 Size.size in
assert (str != str');
assert (has_proxy str);
assert (has_proxy str');
str'
let underlying_buffers = ref []
type 'a alloc_list =
| Cons of {mutable hd: 'a; mutable tl: 'a alloc_list}
| Nil of {mutable junk: unit}
let alloc_cache = ref (Nil {junk=()})
let allocs_remaining = ref 1
let[@inline never] useless_allocation s =
(s,s,s,s,s,s)
let alloc () =
match !alloc_cache with
| Cons {hd=s; tl=ss} ->
(* extremely tricky GC optimisation:
1. even though this needs no allocation, forcing some heap allocation
clocks the GC, which lets it detect dropped bigstrings by doing an
occasional minor GC even if there is no other allocation.
2. ensuring that the result is always on the minor heap prevents the
alloc_cache pointer flickering between minor and static states as
the cache changes from nonempty to empty. This avoids ref_table
overflow in the write barrier.
3. making the fields of alloc_list mutable means that these tricky
forced allocations won't be optimised away. *)
alloc_cache :=
(match ss with
| Cons {hd;tl} -> Cons{hd;tl}
| Nil _ -> Nil {junk=()});
s
| Nil _ when !allocs_remaining > 1 ->
decr allocs_remaining;
let str = alloc_bigstring Size.size in
underlying_buffers := str :: !underlying_buffers;
mk_proxy str
| Nil _ ->
let str =
let nlive = ref 0 and ngarbage = ref 0 in
let garbage =
List.fold_left (fun garbage buf ->
if has_proxy buf then (incr nlive; garbage)
else (incr ngarbage; Cons {hd=buf; tl=garbage}))
(Nil{junk=()}) !underlying_buffers in
allocs_remaining := max 0 (max 10 (!nlive) - !ngarbage);
(* Printf.printf "%d %d %d\n%!" !nlive !ngarbage !allocs_remaining; *)
match garbage with
| Cons {hd=str; tl=rest} ->
alloc_cache := rest; str
| Nil _ ->
assert (!allocs_remaining > 0);
decr allocs_remaining;
let str = alloc_bigstring Size.size in
underlying_buffers := str :: !underlying_buffers;
str
in
mk_proxy str
let release buf =
alloc_cache := Cons {hd=buf; tl=(!alloc_cache)}
let count_buffers () = List.length !underlying_buffers
end
let use_fill = true
let fill b =
if use_fill then Bigarray.Array1.fill b 'x'
let test_bigstrings () =
for i = 1 to 10000000 do
let b = alloc_bigstring 4096 in
fill b
done;
-1
let test_pinbufs_linear () =
let module Pinbuf = Pinbuf (struct let size = 4096 end) in
for i = 1 to 10000000 do
let b = Pinbuf.alloc () in
fill (b :> bigstring);
Pinbuf.release b;
done;
Pinbuf.count_buffers ()
let test_pinbufs_gc () =
let module Pinbuf = Pinbuf (struct let size = 4096 end) in
for i = 1 to 10000000 do
let b = Pinbuf.alloc () in
fill (b :> bigstring);
ignore b
done;
Pinbuf.count_buffers ()
let test_pinbufs_half_gc () =
let module Pinbuf = Pinbuf (struct let size = 4096 end) in
for i = 1 to 10000000 do
let b = Pinbuf.alloc () in
fill (b :> bigstring);
if i land 1 = 0 then
Pinbuf.release b;
done;
Pinbuf.count_buffers ()
let () =
let fns =
[
"bigstring", test_bigstrings;
"pinbuf (gc)", test_pinbufs_gc;
"pinbuf (half-gc)", test_pinbufs_half_gc;
"pinbuf (manual)", test_pinbufs_linear
] in
Printf.printf "%20s %10s %10s %10s\n" "" "time" "buffers" "minor GCs";
fns |> List.iter (fun (s, f) ->
let gcbefore = Gc.quick_stat () in
let before = Unix.gettimeofday () in
let n = f () in
let after = Unix.gettimeofday () in
let gcafter = Gc.quick_stat () in
Printf.printf "%20s: %9.1fs %10d %10d\n"
s (after -. before) n
(gcafter.minor_collections - gcbefore.minor_collections)
)
#include <caml/mlvalues.h>
#include <caml/bigarray.h>
value pinbuf_bigstring_refcount(value str)
{
struct caml_ba_array* ba = Caml_ba_array_val(str);
if (ba->proxy == NULL) return Val_long(1);
else return Val_long(ba->proxy->refcount);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment