-
-
Save Ozoniuss/87e78f8e35d2ad147d3b81149e438308 to your computer and use it in GitHub Desktop.
package main | |
import ( | |
"bufio" | |
"os" | |
"testing" | |
) | |
var ret = "" | |
func BenchmarkBufio(b *testing.B) { | |
f, _ := os.Open("moby.txt") | |
defer f.Close() | |
var line string | |
for n := 0; n < b.N; n++ { | |
f.Seek(0, 0) | |
s := bufio.NewScanner(f) | |
for s.Scan() { | |
line = s.Text() | |
} | |
} | |
ret = line | |
} | |
// $ go test -bench=. -benchmem main_test.go | |
// goos: windows | |
// goarch: amd64 | |
// cpu: AMD Ryzen 7 5800H with Radeon Graphics | |
// BenchmarkBufio-16 848 1394192 ns/op 1412961 B/op 19231 allocs/op | |
// PASS | |
// ok command-line-arguments 1.439s | |
// | |
// mem profile shows that s.Text() allocates | |
// | |
// Note that it is possible to use s.Bytes() to avoid the allocations. |
Thanks for pointing it out, I'll update the gist.
Any time! Thanks for blogging about performance-related topics, by the way—it's always good to see.
how bad would a string cast of .Bytes()
be?
(Apologies if you saw a version of the below with a mistake in it—this is now corrected.)
If you call string
, it'd allocate. It's identical to s.Text()
.
If you used the same unsafe trick that (*strings.Builder).String uses, namely
func (b *Builder) String() string {
return unsafe.String(unsafe.SliceData(b.buf), len(b.buf))
}
then you'd avoid the allocation, but it'd be unsafe. The next scan may overwrite the underlying data in the string, which breaks the guarantee that strings are immutable. For example,
package main
import (
"bufio"
"bytes"
"fmt"
"strings"
"unsafe"
)
func main() {
const count = 1024
r := bytes.NewReader([]byte(strings.Repeat("hello ", count) + "\n" + strings.Repeat("world ", count) + "\n"))
sc := bufio.NewScanner(r)
called := false
var bs []byte
var s string
for sc.Scan() {
bs = sc.Bytes()
fmt.Printf("scanning :: bs : %p\n", bs)
if !called {
s = unsafe.String(unsafe.SliceData(bs), len(bs))
fmt.Printf("after unsafe cast :: s : %s\n", s[:5])
called = true
}
}
fmt.Printf("after scanning :: s : %s\n", s[:5])
}
prints out
scanning :: bs : 0xc0000ec000
after unsafe cast :: s : hello
scanning :: bs : 0xc0000ec000
after scanning :: s : world
for me.
The interfaces of the bytes and strings packages are almost isomorphic so if you're only wanting to handle the string 'immediately' as a reader then you can avoid the allocation by handling it as a byte slice and doing the necessary work within the scanning loop.
(Note that there are some differences between the packages, e.g. bytes.Runes exists but strings.Runes doesn't.)
You can avoid the allocations with s.Bytes().