Skip to content

Instantly share code, notes, and snippets.

@tbhartman
Last active February 10, 2021 21:36
Show Gist options
  • Save tbhartman/2e2b9e33d1b2b739eb8375bd856299b6 to your computer and use it in GitHub Desktop.
Save tbhartman/2e2b9e33d1b2b739eb8375bd856299b6 to your computer and use it in GitHub Desktop.
Benchmark reading large files in go

Benchmark reading large files in go

go test -bench . -benchmem -benchtime 0.2s

goos: windows
goarch: amd64
pkg: <redacted>
BenchmarkBufferSize/1e3-8                      3          75666900 ns/op            1704 B/op          9 allocs/op
BenchmarkBufferSize/1e3/bufio-8                3          77332900 ns/op            2397 B/op          9 allocs/op
BenchmarkBufferSize/1e3/simple-8               3          99333800 ns/op            2274 B/op          8 allocs/op
BenchmarkBufferSize/1e3/scanner-8              6          40166833 ns/op            6618 B/op         14 allocs/op
BenchmarkBufferSize/1e4-8                     18          12833300 ns/op            1895 B/op          8 allocs/op
BenchmarkBufferSize/1e4/bufio-8               13          16307677 ns/op           11595 B/op          9 allocs/op
BenchmarkBufferSize/1e4/simple-8               7          29142729 ns/op           11432 B/op          7 allocs/op
BenchmarkBufferSize/1e4/scanner-8              7          30571314 ns/op           15763 B/op         13 allocs/op
BenchmarkBufferSize/1e5-8                     39           5923087 ns/op            4084 B/op          8 allocs/op
BenchmarkBufferSize/1e5/bufio-8               26           9192319 ns/op          107884 B/op         10 allocs/op
BenchmarkBufferSize/1e5/simple-8              10          22299490 ns/op          107616 B/op          7 allocs/op
BenchmarkBufferSize/1e5/scanner-8              9          22666522 ns/op          112008 B/op         13 allocs/op
BenchmarkBufferSize/1e6-8                     39           5333428 ns/op           27144 B/op          8 allocs/op
BenchmarkBufferSize/1e6/bufio-8               26          10230781 ns/op         1008971 B/op          9 allocs/op
BenchmarkBufferSize/1e6/simple-8              10          21099570 ns/op         1008771 B/op          7 allocs/op
BenchmarkBufferSize/1e6/scanner-8             10          22999830 ns/op         1013201 B/op         13 allocs/op
BenchmarkBufferSize/1e7-8                     37           6297276 ns/op          271663 B/op          8 allocs/op
BenchmarkBufferSize/1e7/bufio-8               21          10714238 ns/op        10003750 B/op          9 allocs/op
BenchmarkBufferSize/1e7/simple-8              10          22700070 ns/op        10003569 B/op          7 allocs/op
BenchmarkBufferSize/1e7/scanner-8              8          26874938 ns/op        10007968 B/op         13 allocs/op
BenchmarkBufferSize/1e8-8                     33           7000003 ns/op         3031879 B/op          8 allocs/op
BenchmarkBufferSize/1e8/bufio-8               16          15562531 ns/op        100009260 B/op         9 allocs/op
BenchmarkBufferSize/1e8/simple-8               4          51500425 ns/op        100009112 B/op         7 allocs/op
BenchmarkBufferSize/1e8/scanner-8              7          56714329 ns/op        100013464 B/op        13 allocs/op
PASS
ok      <redacted>       12.216s
package main_test
import (
"bufio"
"bytes"
"fmt"
"io"
"io/ioutil"
"math"
"os"
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func BenchmarkBufferSize(b *testing.B) {
// create temp file
f, err := ioutil.TempFile("", "bench_*.txt")
assert.NoError(b, err)
defer os.Remove(f.Name())
defer f.Close()
var lines int = 1e6
var line string = "abcdefghijklmnopqrstuvwxyz\n"
for i := 0; i < lines; i++ {
f.WriteString(line)
}
f.Close()
for _, pow := range []int{3, 4, 5, 6, 7, 8} {
b.Run(fmt.Sprintf("1e%d", pow), func(b *testing.B) {
var data []byte = make([]byte, int(math.Pow(10, float64(pow))))
for i := 0; i < b.N; i++ {
g, err := os.Open(f.Name())
if !assert.NoError(b, err) {
return
}
defer g.Close()
for err == nil {
_, err = g.Read(data)
}
assert.EqualValues(b, io.EOF, err)
}
})
b.Run(fmt.Sprintf("1e%d/bufio", pow), func(b *testing.B) {
var data []byte = make([]byte, 100)
for i := 0; i < b.N; i++ {
g, err := os.Open(f.Name())
if !assert.NoError(b, err) {
return
}
defer g.Close()
r := bufio.NewReaderSize(g, int(math.Pow(10, float64(pow))))
for err == nil {
_, err = r.Read(data)
}
assert.EqualValues(b, io.EOF, err)
}
})
b.Run(fmt.Sprintf("1e%d/simple", pow), func(b *testing.B) {
var data []byte = make([]byte, 300)
var curIndex int
var nextIndex int
for i := 0; i < b.N; i++ {
g, err := os.Open(f.Name())
if !assert.NoError(b, err) {
return
}
defer g.Close()
r := bufio.NewReaderSize(g, int(math.Pow(10, float64(pow))))
var scanLines int
for err == nil {
_, err = r.Read(data)
if err != nil {
break
}
for {
nextIndex = bytes.IndexRune(data[curIndex:], '\n')
if nextIndex < 0 {
break
}
curIndex += nextIndex + 1
scanLines++
}
curIndex = 0
}
// assert.EqualValues(b, lines, scanLines)
}
})
b.Run(fmt.Sprintf("1e%d/scanner", pow), func(b *testing.B) {
for i := 0; i < b.N; i++ {
g, err := os.Open(f.Name())
if !assert.NoError(b, err) {
return
}
defer g.Close()
r := bufio.NewReaderSize(g, int(math.Pow(10, float64(pow))))
s := bufio.NewScanner(r)
var scanLines int
for s.Scan() {
scanLines++
}
assert.EqualValues(b, lines, scanLines)
}
})
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment