Skip to content

Instantly share code, notes, and snippets.

@nsrip-dd
Last active October 28, 2022 12:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nsrip-dd/dc3468a409f23cc9c0f871ab8127432b to your computer and use it in GitHub Desktop.
Save nsrip-dd/dc3468a409f23cc9c0f871ab8127432b to your computer and use it in GitHub Desktop.
Testing the cost of zero-initializing data

Results on my laptop:

goos: darwin
goarch: amd64
pkg: stackzero
cpu: Intel(R) Core(TM) i7-1068NG7 CPU @ 2.30GHz
BenchmarkZeroInit/1-8 	412225224	         2.798 ns/op	       0 B/op	       0 allocs/op
BenchmarkZeroInit/8-8 	434050291	         2.688 ns/op	       0 B/op	       0 allocs/op
BenchmarkZeroInit/64-8         	100000000	        10.18 ns/op	       0 B/op	       0 allocs/op
BenchmarkZeroInit/128-8        	61322558	        19.02 ns/op	       0 B/op	       0 allocs/op
BenchmarkZeroInit/512-8        	40604328	        29.16 ns/op	       0 B/op	       0 allocs/op
BenchmarkZeroInit/1024-8       	24670831	        46.31 ns/op	       0 B/op	       0 allocs/op
BenchmarkZeroInit/8192-8       	 1362777	       876.5 ns/op	       0 B/op	       0 allocs/op
PASS
ok  	stackzero	9.763s

Snippet of CPU profile stuff:

stackzero.zeroinit128
/Users/nick.ripley/sandbox/go/stackzero/zeroinit_test.go

  Total:       760ms     17.48s (flat, cum) 215.54%
     30            .          .           //go:noinline 
     31        380ms      380ms           func zeroinit128() uintptr { 
     32        190ms     15.39s           	var t [128]uintptr 
     33        190ms      1.71s           	return use(t[:]) 
     34            .          .           } 
stackzero.zeroinit512
/Users/nick.ripley/sandbox/go/stackzero/zeroinit_test.go

  Total:      12.38s     14.95s (flat, cum) 184.34%
     36            .          .           //go:noinline 
     37        2.46s      2.46s           func zeroinit512() uintptr { 
     38        9.28s      9.45s           	var t [512]uintptr 
     39        640ms      3.04s           	return use(t[:]) 
     40            .          .           } 
stackzero.zeroinit1024
/Users/nick.ripley/sandbox/go/stackzero/zeroinit_test.go

  Total:      10.70s     12.26s (flat, cum) 151.17%
     42            .          .           //go:noinline 
     43        440ms      440ms           func zeroinit1024() uintptr { 
     44        9.75s      9.75s           	var t [1024]uintptr 
     45        510ms      2.07s           	return use(t[:]) 
     46            .          .           }

Annotated assembly:

     1.58s      1.72s (flat, cum) 21.21% of Total
         .          .    10ef1e0: MOVQ SP, R12                            ;zeroinit_test.go:49
         .          .    10ef1e3: SUBQ $0xffa0, R12
         .          .    10ef1ea: JB 0x10ef23a
         .          .    10ef1ec: CMPQ 0x10(R14), R12
         .          .    10ef1f0: JBE 0x10ef23a
         .          .    10ef1f2: SUBQ $0x10020, SP
         .          .    10ef1f9: MOVQ BP, 0x10018(SP)
         .          .    10ef201: LEAQ 0x10018(SP), BP
         .          .    10ef209: LEAQ 0x18(SP), DI                       ;zeroinit_test.go:50
         .          .    10ef20e: MOVL $0x2000, CX
         .          .    10ef213: XORL AX, AX
     1.41s      1.41s    10ef215: REP; STOSQ AX, ES:0(DI)                 ;stackzero.zeroinit8192 zeroinit_test.go:50
         .          .    10ef218: LEAQ 0x18(SP), AX                       ;zeroinit_test.go:51
         .          .    10ef21d: MOVL $0x2000, BX
         .          .    10ef222: MOVQ BX, CX
         .      140ms    10ef225: CALL stackzero.use(SB)                  ;stackzero.zeroinit8192 zeroinit_test.go:51
         .          .    10ef22a: MOVQ 0x10018(SP), BP                    ;zeroinit_test.go:51
         .          .    10ef232: ADDQ $0x10020, SP
         .          .    10ef239: RET
         .          .    10ef23a: CALL runtime.morestack_noctxt.abi0(SB)  ;zeroinit_test.go:49
     170ms      170ms    10ef23f: NOPL                                    ;stackzero.zeroinit8192 zeroinit_test.go:49
         .          .    10ef240: JMP stackzero.zeroinit8192(SB)          ;zeroinit_test.go:49
package main
import "testing"
//go:noinline
func use(u []uintptr) uintptr {
// If we don't inline this, the compiler is smart enough to
// turn do<N> into "return 0"
return u[len(u)-1]
}
//go:noinline
func zeroinit1() uintptr {
var t [1]uintptr
return use(t[:])
}
//go:noinline
func zeroinit8() uintptr {
var t [8]uintptr
return use(t[:])
}
//go:noinline
func zeroinit64() uintptr {
var t [64]uintptr
return use(t[:])
}
//go:noinline
func zeroinit128() uintptr {
var t [128]uintptr
return use(t[:])
}
//go:noinline
func zeroinit512() uintptr {
var t [512]uintptr
return use(t[:])
}
//go:noinline
func zeroinit1024() uintptr {
var t [1024]uintptr
return use(t[:])
}
//go:noinline
func zeroinit8192() uintptr {
var t [8192]uintptr
return use(t[:])
}
var sink uintptr
func BenchmarkZeroInit(b *testing.B) {
cases := []struct {
F func() uintptr
Size string
}{
{zeroinit1, "1"},
{zeroinit8, "8"},
{zeroinit64, "64"},
{zeroinit128, "128"},
{zeroinit512, "512"},
{zeroinit1024, "1024"},
{zeroinit8192, "8192"},
}
// grow stack
_ = zeroinit8192()
for _, c := range cases {
b.Run(c.Size, func(b *testing.B) {
for i := 0; i < b.N; i++ {
sink += c.F()
}
})
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment