Skip to content

Instantly share code, notes, and snippets.

@alex
Created August 29, 2019 20:20
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alex/c628afea3b801195c27947f6a42c3534 to your computer and use it in GitHub Desktop.
Save alex/c628afea3b801195c27947f6a42c3534 to your computer and use it in GitHub Desktop.
package foo
import (
"bytes"
"testing"
)
var Data1 = bytes.Repeat([]byte("a"), 1)
var Data10 = bytes.Repeat([]byte("a"), 10)
var Data100 = bytes.Repeat([]byte("a"), 100)
var Data1000 = bytes.Repeat([]byte("a"), 1000)
var Data10000 = bytes.Repeat([]byte("a"), 10000)
func BenchmarkLinear_1(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
linearFindFirstMultiByteChar(Data1)
}
}
func BenchmarkVectorize_1(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
vectorizeFindFirstMultiByteChar(Data1)
}
}
func BenchmarkLinear_10(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
linearFindFirstMultiByteChar(Data10)
}
}
func BenchmarkVectorize_10(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
vectorizeFindFirstMultiByteChar(Data10)
}
}
func BenchmarkLinear_100(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
linearFindFirstMultiByteChar(Data100)
}
}
func BenchmarkVectorize_100(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
vectorizeFindFirstMultiByteChar(Data100)
}
}
func BenchmarkLinear_1000(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
linearFindFirstMultiByteChar(Data1000)
}
}
func BenchmarkVectorize_1000(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
vectorizeFindFirstMultiByteChar(Data1000)
}
}
func BenchmarkLinear_10000(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
linearFindFirstMultiByteChar(Data10000)
}
}
func BenchmarkVectorize_10000(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
vectorizeFindFirstMultiByteChar(Data10000)
}
}
alex@penguin /t/yy> go test -bench=.
goos: linux
goarch: amd64
pkg: foo
BenchmarkLinear_1-4 500000000 3.53 ns/op
BenchmarkVectorize_1-4 300000000 4.33 ns/op
BenchmarkLinear_10-4 200000000 9.82 ns/op
BenchmarkVectorize_10-4 200000000 6.70 ns/op
BenchmarkLinear_100-4 20000000 77.2 ns/op
BenchmarkVectorize_100-4 100000000 14.2 ns/op
BenchmarkLinear_1000-4 2000000 690 ns/op
BenchmarkVectorize_1000-4 20000000 98.7 ns/op
BenchmarkLinear_10000-4 200000 7754 ns/op
BenchmarkVectorize_10000-4 2000000 891 ns/op
PASS
package foo
import (
"unsafe"
)
func linearFindFirstMultiByteChar(bytes []byte) int {
for bytesIdx, b := range bytes {
// We have a multi-byte codepoint, we need to allocate
// codepointIndices
if b&0x80 == 0x80 {
return bytesIdx
}
}
return len(bytes)
}
const wordSize = int(unsafe.Sizeof(uintptr(0)))
func vectorizeFindFirstMultiByteChar(bytes []byte) int {
// TODO: check if unaligned is allowed
// Attempt to do a vectorized scan.
if len(bytes) > wordSize {
w := len(bytes) / wordSize
bytesw := *(*[]uintptr)(unsafe.Pointer(&bytes))
for i := 0; i < w; i++ {
if bytesw[i]&uintptr(0x8080808080808080) != 0 {
return i * wordSize
}
}
}
for i := len(bytes) - (len(bytes) % wordSize); i < len(bytes); i++ {
if bytes[i]&0x80 == 0x80 {
return i
}
}
return len(bytes)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment