Skip to content

Instantly share code, notes, and snippets.

@jonathan-laurent
Last active April 16, 2023 12:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonathan-laurent/b0cba8a0a9c6142ab95df17a45e51f66 to your computer and use it in GitHub Desktop.
Save jonathan-laurent/b0cba8a0a9c6142ab95df17a45e51f66 to your computer and use it in GitHub Desktop.
Aliasing Array.set or Array.unsafe_set leads to a 3x slowdown

Aliasing Array.unsafe_set leads to a 3x slowdown

How to replicate

To replicate, just run:

opam install core_bench core_unix
bash replicate.sh

Results

The aliased version is about 3x slower (using OCaml 4.14.1 with Flambda and -O3):

┌─────────┬──────────┬────────────┐
│ Name    │ Time/Run │ Percentage │
├─────────┼──────────┼────────────┤
│ Normal  │   0.89ns │     29.25% │
│ Variant │   3.05ns │    100.00% │
└─────────┴──────────┴────────────┘

According to the Flambda inlining report, unsafe_set was inlined:

* Application of unsafe_set/2{asm_alias.ml:5,2-20}

  This function was not specialised because it is not recursive.

  This function was inlined because of an annotation.

Looking at the assembly, it seems like the aliased version has an additional bound-check (which is weird given that Array.unsafe_set is used):

Normal version:

.L101:
	movq	8(%r14), %r15
	movq	camlDune__exe__Asm_normal__Pccall_36@GOTPCREL(%rip), %rbx
	movq	%rax, (%rbx)
	movq	camlDune__exe__Asm_normal__Pccall_36@GOTPCREL(%rip), %rax
	movq	(%rax), %rax
	movq	$3, (%rax)
	movq	camlDune__exe__Asm_normal__Parrayrefs_34@GOTPCREL(%rip), %rax
	movq	camlDune__exe__Asm_normal__Pccall_36@GOTPCREL(%rip), %rbx
	movq	(%rbx), %rbx
	movq	-8(%rbx), %rdi
	cmpq	$1023, %rdi

Aliased version:

.L103:
	movq	8(%r14), %r15
	movq	camlDune__exe__Asm_alias__Pccall_46@GOTPCREL(%rip), %rbx
	movq	%rax, (%rbx)
	movq	camlDune__exe__Asm_alias__Pccall_46@GOTPCREL(%rip), %rax
	movq	(%rax), %rdi
	movzbq	-8(%rdi), %rax
	cmpq	$254, %rax
	je	.L101
	movl	$3, %esi
	call	caml_modify@PLT
	jmp	.L100
.L101:
	movl	$3, %eax
	movsd	(%rax), %xmm0
	movsd	%xmm0, (%rdi)
.L100:
	movq	camlDune__exe__Asm_alias__Parrayrefs_44@GOTPCREL(%rip), %rax
	movq	camlDune__exe__Asm_alias__Pccall_46@GOTPCREL(%rip), %rbx
	movq	(%rbx), %rbx
	movq	-8(%rbx), %rdi
	cmpq	$1023, %rdi
...
.L104:
	call	caml_ml_array_bound_error@PLT
let unsafe_set arr idx v = Array.unsafe_set arr idx v [@@inline]
let main () =
let arr = Array.make 1 0 in
unsafe_set arr 0 1;
print_int arr.(0)
let () = main ()
let main () =
let arr = Array.make 1 0 in
Array.unsafe_set arr 0 1;
print_int arr.(0)
let () = main ()
(executables
(names asm_normal asm_alias profile)
(libraries core_bench core_unix.command_unix)
(ocamlopt_flags -O3 -S -inlining-report))
(lang dune 3.3)
let unsafe_set arr idx v = Array.unsafe_set arr idx v [@@inline]
let normal =
let arr = Array.make 1 0 in
fun () -> Array.unsafe_set arr 0 1
let variant =
let arr = Array.make 1 0 in
fun () -> unsafe_set arr 0 1
let () =
let open Core_bench in
let benchs = [
Bench.Test.create ~name:"Normal" normal;
Bench.Test.create ~name:"Variant" variant] in
benchs |> Bench.make_command |> Command_unix.run
#!/bin/bash
dune clean
dune build
cp $(find . -name "*Asm_alias.s") alias.s
cp $(find . -name "*Asm_normal.s") normal.s
cp $(find . -name "*alias.0.inlining.org") alias.0.inlining.org
dune exec ./profile.exe
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment