Skip to content

Instantly share code, notes, and snippets.

@dezren39
Forked from eliasdaler/0_bench_test.go
Created February 17, 2024 08:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dezren39/ab485a29fe9af94831d9f3292d2ced54 to your computer and use it in GitHub Desktop.
Save dezren39/ab485a29fe9af94831d9f3292d2ced54 to your computer and use it in GitHub Desktop.
The quest for a good vector library for a gamedev
package main
import (
"testing"
"github.com/go-gl/mathgl/mgl32"
"github.com/kvartborg/vector"
"github.com/ungerik/go3d/vec2"
"gonum.org/v1/gonum/mat"
)
// This is a very simple implementation... Just a flat struct
type Vec2f struct {
X, Y float32
}
func (a Vec2f) Add(b Vec2f) Vec2f {
return Vec2f{a.X + b.X, a.Y + b.Y}
}
func BenchmarkVec2f(bench *testing.B) {
a := Vec2f{10, 20}
b := Vec2f{20, 30}
c := Vec2f{30, 40}
for i := 0; i < bench.N; i++ {
d := a.Add(b)
e := b.Add(c)
f := d.Add(e)
_ = f
}
}
// kvartborg/vector version
// Uses slices... >_>
func BenchmarkKvartBorgVector(bench *testing.B) {
type vec = vector.Vector
a := vec{10, 20}
b := vec{20, 30}
c := vec{30, 40}
for i := 0; i < bench.N; i++ {
d := a.Add(b)
e := b.Add(c)
f := d.Add(e)
_ = f
}
}
// gonum
// This one is the worst... too much allocations for everything
func BenchmarkGoNum(bench *testing.B) {
a := mat.NewVecDense(2, []float64{10., 20.})
b := mat.NewVecDense(2, []float64{20., 30.})
c := mat.NewVecDense(2, []float64{30., 40.})
for i := 0; i < bench.N; i++ {
d := mat.NewVecDense(2, nil)
d.AddVec(a, b)
e := mat.NewVecDense(2, nil)
d.AddVec(b, c)
f := mat.NewVecDense(2, nil)
f.AddVec(d, e)
_ = f
}
}
// go3d
// Looks pretty nice, but API is not ideal - it uses pointers everywhere and I
// feel like this might cause escape analysis to allocate more on heap than
// needed
func BenchmarkGo3d(bench *testing.B) {
a := vec2.T{10., 20.}
b := vec2.T{20., 30.}
c := vec2.T{30., 40.}
for i := 0; i < bench.N; i++ {
d := vec2.Zero
d.Add(&a).Add(&b)
e := vec2.Zero
e.Add(&b).Add(&c)
f := vec2.Zero
f.Add(&d).Add(&e)
_ = f
}
}
func BenchmarkGo3dImproved(bench *testing.B) {
// But what if it had a better interface...
vec2Add := func(a, b vec2.T) vec2.T {
return vec2.T{a[0] + b[0], a[1] + b[1]}
}
a := vec2.T{10., 20.}
b := vec2.T{20., 30.}
c := vec2.T{30., 40.}
for i := 0; i < bench.N; i++ {
d := vec2Add(a, b)
e := vec2Add(b, c)
f := vec2Add(d, e)
_ = f
}
}
func BenchmarkGoGL(bench *testing.B) {
a := mgl32.Vec2{10., 20.}
b := mgl32.Vec2{20., 30.}
c := mgl32.Vec2{30., 40.}
for i := 0; i < bench.N; i++ {
d := a.Add(b)
e := b.Add(c)
f := d.Add(e)
_ = f
}
}
goos: linux
goarch: amd64
pkg: test.com/m
cpu: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz
BenchmarkVec2f-12 1000000000 0.2522 ns/op 0 B/op 0 allocs/op
BenchmarkKvartBorgVector-12 24883574 47.28 ns/op 48 B/op 3 allocs/op
BenchmarkGoNum-12 4931695 242.2 ns/op 144 B/op 5 allocs/op
BenchmarkGo3d-12 274195806 4.418 ns/op 0 B/op 0 allocs/op
BenchmarkGo3dImproved-12 455897227 2.622 ns/op 0 B/op 0 allocs/op
BenchmarkGoGL-12 455912618 2.626 ns/op 0 B/op 0 allocs/op
package main
import (
"fmt"
"github.com/go-gl/mathgl/mgl32"
)
// This is a very simple implementation... Just a flat struct
type Vec2f struct {
X, Y float32
}
func (a Vec2f) Add(b Vec2f) Vec2f {
return Vec2f{a.X + b.X, a.Y + b.Y}
}
func f1() {
a := Vec2f{10., 20.}
b := Vec2f{20., 30.}
c := Vec2f{30., 40.}
d := a.Add(b)
e := b.Add(c)
f := d.Add(e)
fmt.Println(f.X, f.Y)
}
func f2() {
a := mgl32.Vec2{10., 20.}
b := mgl32.Vec2{20., 30.}
c := mgl32.Vec2{30., 40.}
d := a.Add(b)
e := b.Add(c)
f := d.Add(e)
fmt.Println(f.X(), f.Y())
}
func main() {
f1()
f2()
}
000000000047e440 <main.f1>:
func (a Vec2f) Add(b Vec2f) Vec2f {
return Vec2f{a.X + b.X, a.Y + b.Y}
}
func f1() {
47e440: 49 3b 66 10 cmp 0x10(%r14),%rsp
47e444: 76 7e jbe 47e4c4 <main.f1+0x84>
47e446: 48 83 ec 58 sub $0x58,%rsp
47e44a: 48 89 6c 24 50 mov %rbp,0x50(%rsp)
47e44f: 48 8d 6c 24 50 lea 0x50(%rsp),%rbp
b := Vec2f{20., 30.}
c := Vec2f{30., 40.}
d := a.Add(b)
e := b.Add(c)
f := d.Add(e)
fmt.Println(f.X, f.Y)
47e454: b8 00 00 a0 42 mov $0x42a00000,%eax
47e459: e8 42 b1 f8 ff callq 4095a0 <runtime.convT32>
47e45e: 48 89 44 24 28 mov %rax,0x28(%rsp)
47e463: b8 00 00 f0 42 mov $0x42f00000,%eax
47e468: e8 33 b1 f8 ff callq 4095a0 <runtime.convT32>
47e46d: 48 8d 4c 24 30 lea 0x30(%rsp),%rcx
47e472: 44 0f 11 39 movups %xmm15,(%rcx)
47e476: 48 8d 54 24 40 lea 0x40(%rsp),%rdx
47e47b: 44 0f 11 3a movups %xmm15,(%rdx)
47e47f: 48 8d 15 1a 70 00 00 lea 0x701a(%rip),%rdx # 4854a0 <type.*+0x64a0>
47e486: 48 89 54 24 30 mov %rdx,0x30(%rsp)
47e48b: 48 8b 5c 24 28 mov 0x28(%rsp),%rbx
47e490: 48 89 5c 24 38 mov %rbx,0x38(%rsp)
47e495: 48 89 54 24 40 mov %rdx,0x40(%rsp)
47e49a: 48 89 44 24 48 mov %rax,0x48(%rsp)
return Fprintln(os.Stdout, a...)
47e49f: 48 8b 1d 8a 7a 0a 00 mov 0xa7a8a(%rip),%rbx # 525f30 <os.Stdout>
47e4a6: 48 8d 05 d3 34 03 00 lea 0x334d3(%rip),%rax # 4b1980 <go.itab.*os.File,io.Writer>
47e4ad: bf 02 00 00 00 mov $0x2,%edi
47e4b2: 48 89 fe mov %rdi,%rsi
47e4b5: e8 c6 aa ff ff callq 478f80 <fmt.Fprintln>
}
47e4ba: 48 8b 6c 24 50 mov 0x50(%rsp),%rbp
47e4bf: 48 83 c4 58 add $0x58,%rsp
47e4c3: c3 retq
func f1() {
47e4c4: e8 b7 a7 fd ff callq 458c80 <runtime.morestack_noctxt.abi0>
47e4c9: e9 72 ff ff ff jmpq 47e440 <main.f1>
47e4ce: cc int3
47e4cf: cc int3
47e4d0: cc int3
47e4d1: cc int3
47e4d2: cc int3
47e4d3: cc int3
47e4d4: cc int3
47e4d5: cc int3
47e4d6: cc int3
47e4d7: cc int3
47e4d8: cc int3
47e4d9: cc int3
47e4da: cc int3
47e4db: cc int3
47e4dc: cc int3
47e4dd: cc int3
47e4de: cc int3
47e4df: cc int3
000000000047e4e0 <main.f2>:
func f2() {
47e4e0: 4c 8d 64 24 a8 lea -0x58(%rsp),%r12
47e4e5: 4d 3b 66 10 cmp 0x10(%r14),%r12
47e4e9: 0f 86 28 02 00 00 jbe 47e717 <main.f2+0x237>
47e4ef: 48 81 ec d8 00 00 00 sub $0xd8,%rsp
47e4f6: 48 89 ac 24 d0 00 00 mov %rbp,0xd0(%rsp)
47e4fd: 00
47e4fe: 48 8d ac 24 d0 00 00 lea 0xd0(%rsp),%rbp
47e505: 00
a := mgl32.Vec2{10., 20.}
47e506: 48 c7 84 24 a0 00 00 movq $0x0,0xa0(%rsp)
47e50d: 00 00 00 00 00
47e512: f3 0f 10 05 22 2c 03 movss 0x32c22(%rip),%xmm0 # 4b113c <$f32.41200000>
47e519: 00
47e51a: f3 0f 11 84 24 a0 00 movss %xmm0,0xa0(%rsp)
47e521: 00 00
47e523: f3 0f 10 05 15 2c 03 movss 0x32c15(%rip),%xmm0 # 4b1140 <$f32.41a00000>
47e52a: 00
47e52b: f3 0f 11 84 24 a4 00 movss %xmm0,0xa4(%rsp)
47e532: 00 00
b := mgl32.Vec2{20., 30.}
47e534: 48 c7 84 24 98 00 00 movq $0x0,0x98(%rsp)
47e53b: 00 00 00 00 00
47e540: f3 0f 11 84 24 98 00 movss %xmm0,0x98(%rsp)
47e547: 00 00
47e549: f3 0f 10 05 f3 2b 03 movss 0x32bf3(%rip),%xmm0 # 4b1144 <$f32.41f00000>
47e550: 00
47e551: f3 0f 11 84 24 9c 00 movss %xmm0,0x9c(%rsp)
47e558: 00 00
c := mgl32.Vec2{30., 40.}
47e55a: 48 c7 84 24 90 00 00 movq $0x0,0x90(%rsp)
47e561: 00 00 00 00 00
47e566: f3 0f 11 84 24 90 00 movss %xmm0,0x90(%rsp)
47e56d: 00 00
47e56f: f3 0f 10 05 d1 2b 03 movss 0x32bd1(%rip),%xmm0 # 4b1148 <$f32.42200000>
47e576: 00
47e577: f3 0f 11 84 24 94 00 movss %xmm0,0x94(%rsp)
47e57e: 00 00
d := a.Add(b)
47e580: 48 8b 8c 24 a0 00 00 mov 0xa0(%rsp),%rcx
47e587: 00
47e588: 48 89 4c 24 60 mov %rcx,0x60(%rsp)
47e58d: 48 8b 8c 24 98 00 00 mov 0x98(%rsp),%rcx
47e594: 00
47e595: 48 89 4c 24 48 mov %rcx,0x48(%rsp)
}
// Add performs element-wise addition between two vectors. It is equivalent to iterating
// over every element of v1 and adding the corresponding element of v2 to it.
func (v1 Vec2) Add(v2 Vec2) Vec2 {
return Vec2{v1[0] + v2[0], v1[1] + v2[1]}
47e59a: f3 0f 10 44 24 48 movss 0x48(%rsp),%xmm0
47e5a0: f3 0f 58 44 24 60 addss 0x60(%rsp),%xmm0
47e5a6: f3 0f 10 4c 24 4c movss 0x4c(%rsp),%xmm1
47e5ac: f3 0f 58 4c 24 64 addss 0x64(%rsp),%xmm1
47e5b2: f3 0f 11 84 24 88 00 movss %xmm0,0x88(%rsp)
47e5b9: 00 00
47e5bb: f3 0f 11 8c 24 8c 00 movss %xmm1,0x8c(%rsp)
47e5c2: 00 00
e := b.Add(c)
47e5c4: 48 8b 8c 24 98 00 00 mov 0x98(%rsp),%rcx
47e5cb: 00
47e5cc: 48 89 4c 24 50 mov %rcx,0x50(%rsp)
47e5d1: 48 8b 8c 24 90 00 00 mov 0x90(%rsp),%rcx
47e5d8: 00
47e5d9: 48 89 4c 24 38 mov %rcx,0x38(%rsp)
47e5de: f3 0f 10 44 24 38 movss 0x38(%rsp),%xmm0
47e5e4: f3 0f 58 44 24 50 addss 0x50(%rsp),%xmm0
47e5ea: f3 0f 10 4c 24 3c movss 0x3c(%rsp),%xmm1
47e5f0: f3 0f 58 4c 24 54 addss 0x54(%rsp),%xmm1
47e5f6: f3 0f 11 84 24 80 00 movss %xmm0,0x80(%rsp)
47e5fd: 00 00
47e5ff: f3 0f 11 8c 24 84 00 movss %xmm1,0x84(%rsp)
47e606: 00 00
f := d.Add(e)
47e608: 48 8b 8c 24 88 00 00 mov 0x88(%rsp),%rcx
47e60f: 00
47e610: 48 89 4c 24 58 mov %rcx,0x58(%rsp)
47e615: 48 8b 8c 24 80 00 00 mov 0x80(%rsp),%rcx
47e61c: 00
47e61d: 48 89 4c 24 40 mov %rcx,0x40(%rsp)
47e622: 48 c7 44 24 30 00 00 movq $0x0,0x30(%rsp)
47e629: 00 00
47e62b: f3 0f 10 44 24 40 movss 0x40(%rsp),%xmm0
47e631: f3 0f 58 44 24 58 addss 0x58(%rsp),%xmm0
47e637: f3 0f 11 44 24 30 movss %xmm0,0x30(%rsp)
47e63d: f3 0f 10 4c 24 44 movss 0x44(%rsp),%xmm1
47e643: f3 0f 58 4c 24 5c addss 0x5c(%rsp),%xmm1
47e649: f3 0f 11 4c 24 34 movss %xmm1,0x34(%rsp)
47e64f: f3 0f 11 44 24 78 movss %xmm0,0x78(%rsp)
47e655: f3 0f 11 4c 24 7c movss %xmm1,0x7c(%rsp)
fmt.Println(f.X(), f.Y())
47e65b: 48 8b 4c 24 30 mov 0x30(%rsp),%rcx
47e660: 48 89 4c 24 70 mov %rcx,0x70(%rsp)
// X is an element access func, it is equivalent to v[n] where
// n is some valid index. The mappings are XYZW (X=0, Y=1 etc). Benchmarks
// show that this is more or less as fast as direct acces, probably due to
// inlining, so use v[0] or v.X() depending on personal preference.
func (v Vec2) X() float32 {
return v[0]
47e665: f3 0f 10 44 24 70 movss 0x70(%rsp),%xmm0
47e66b: 48 8b 4c 24 78 mov 0x78(%rsp),%rcx
47e670: 48 89 4c 24 68 mov %rcx,0x68(%rsp)
47e675: 66 0f 7e c0 movd %xmm0,%eax
// Y is an element access func, it is equivalent to v[n] where
// n is some valid index. The mappings are XYZW (X=0, Y=1 etc). Benchmarks
// show that this is more or less as fast as direct acces, probably due to
// inlining, so use v[0] or v.X() depending on personal preference.
func (v Vec2) Y() float32 {
return v[1]
47e679: f3 0f 10 44 24 6c movss 0x6c(%rsp),%xmm0
47e67f: f3 0f 11 44 24 2c movss %xmm0,0x2c(%rsp)
47e685: e8 16 af f8 ff callq 4095a0 <runtime.convT32>
47e68a: 48 89 84 24 a8 00 00 mov %rax,0xa8(%rsp)
47e691: 00
47e692: f3 0f 10 44 24 2c movss 0x2c(%rsp),%xmm0
47e698: 66 0f 7e c1 movd %xmm0,%ecx
47e69c: 89 c8 mov %ecx,%eax
47e69e: 66 90 xchg %ax,%ax
47e6a0: e8 fb ae f8 ff callq 4095a0 <runtime.convT32>
47e6a5: 48 8d 8c 24 b0 00 00 lea 0xb0(%rsp),%rcx
47e6ac: 00
47e6ad: 44 0f 11 39 movups %xmm15,(%rcx)
47e6b1: 48 8d 94 24 c0 00 00 lea 0xc0(%rsp),%rdx
47e6b8: 00
47e6b9: 44 0f 11 3a movups %xmm15,(%rdx)
47e6bd: 48 8d 15 dc 6d 00 00 lea 0x6ddc(%rip),%rdx # 4854a0 <type.*+0x64a0>
47e6c4: 48 89 94 24 b0 00 00 mov %rdx,0xb0(%rsp)
47e6cb: 00
47e6cc: 48 8b 9c 24 a8 00 00 mov 0xa8(%rsp),%rbx
47e6d3: 00
47e6d4: 48 89 9c 24 b8 00 00 mov %rbx,0xb8(%rsp)
47e6db: 00
47e6dc: 48 89 94 24 c0 00 00 mov %rdx,0xc0(%rsp)
47e6e3: 00
47e6e4: 48 89 84 24 c8 00 00 mov %rax,0xc8(%rsp)
47e6eb: 00
47e6ec: 48 8b 1d 3d 78 0a 00 mov 0xa783d(%rip),%rbx # 525f30 <os.Stdout>
47e6f3: 48 8d 05 86 32 03 00 lea 0x33286(%rip),%rax # 4b1980 <go.itab.*os.File,io.Writer>
47e6fa: bf 02 00 00 00 mov $0x2,%edi
47e6ff: 48 89 fe mov %rdi,%rsi
47e702: e8 79 a8 ff ff callq 478f80 <fmt.Fprintln>
}
47e707: 48 8b ac 24 d0 00 00 mov 0xd0(%rsp),%rbp
47e70e: 00
47e70f: 48 81 c4 d8 00 00 00 add $0xd8,%rsp
47e716: c3 retq
func f2() {
47e717: e8 64 a5 fd ff callq 458c80 <runtime.morestack_noctxt.abi0>
47e71c: 0f 1f 40 00 nopl 0x0(%rax)
47e720: e9 bb fd ff ff jmpq 47e4e0 <main.f2>
47e725: cc int3
47e726: cc int3
47e727: cc int3
47e728: cc int3
47e729: cc int3
47e72a: cc int3
47e72b: cc int3
47e72c: cc int3
47e72d: cc int3
47e72e: cc int3
47e72f: cc int3
47e730: cc int3
47e731: cc int3
47e732: cc int3
47e733: cc int3
47e734: cc int3
47e735: cc int3
47e736: cc int3
47e737: cc int3
47e738: cc int3
47e739: cc int3
47e73a: cc int3
47e73b: cc int3
47e73c: cc int3
47e73d: cc int3
47e73e: cc int3
47e73f: cc int3
000000000047e740 <main.main>:
func main() {
47e740: 49 3b 66 10 cmp 0x10(%r14),%rsp
47e744: 76 1f jbe 47e765 <main.main+0x25>
47e746: 48 83 ec 08 sub $0x8,%rsp
47e74a: 48 89 2c 24 mov %rbp,(%rsp)
47e74e: 48 8d 2c 24 lea (%rsp),%rbp
f1()
47e752: e8 e9 fc ff ff callq 47e440 <main.f1>
f2()
47e757: e8 84 fd ff ff callq 47e4e0 <main.f2>
}
47e75c: 48 8b 2c 24 mov (%rsp),%rbp
47e760: 48 83 c4 08 add $0x8,%rsp
47e764: c3 retq
func main() {
47e765: e8 16 a5 fd ff callq 458c80 <runtime.morestack_noctxt.abi0>
47e76a: eb d4 jmp 47e740 <main.main>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment