Skip to content

Instantly share code, notes, and snippets.

@Deleplace
Created February 17, 2022 16:52
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Deleplace/8a9224f8b2838a9c1d112acde5bdd3c8 to your computer and use it in GitHub Desktop.
Save Deleplace/8a9224f8b2838a9c1d112acde5bdd3c8 to your computer and use it in GitHub Desktop.
Benchmark for sorting a slice of strings, case-insensitively
package bench
import (
"math/rand"
"sort"
"strings"
"testing"
"unicode"
"unicode/utf8"
)
const M = 10_000
var data = make([]string, M)
func init() {
for j := range data {
data[j] = randomString(12)
}
}
func BenchmarkSort(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
b.StopTimer()
rand.Shuffle(len(data), func(ii, jj int) { data[ii], data[jj] = data[jj], data[ii] })
b.StartTimer()
sort.Strings(data)
}
}
func BenchmarkSortLess(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
b.StopTimer()
rand.Shuffle(len(data), func(ii, jj int) { data[ii], data[jj] = data[jj], data[ii] })
b.StartTimer()
sort.Slice(data, func(ii, jj int) bool { return less(data[ii], data[jj]) })
}
}
func BenchmarkSortToLower(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
b.StopTimer()
rand.Shuffle(len(data), func(ii, jj int) { data[ii], data[jj] = data[jj], data[ii] })
b.StartTimer()
sort.Slice(data, func(ii, jj int) bool { return strings.ToLower(data[ii]) < strings.ToLower(data[jj]) })
}
}
func BenchmarkSortInsensitiveOptimized(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
b.StopTimer()
rand.Shuffle(len(data), func(ii, jj int) { data[ii], data[jj] = data[jj], data[ii] })
b.StartTimer()
sort.Slice(data, func(ii, jj int) bool { return lessCaseInsensitive(data[ii], data[jj]) })
}
}
// less compares without allocating. It is equivalent to s<t .
func less(s, t string) bool {
for {
if len(t) == 0 {
return false
}
if len(s) == 0 {
return true
}
c, sizec := utf8.DecodeRuneInString(s)
d, sized := utf8.DecodeRuneInString(t)
if c < d {
return true
}
if c > d {
return false
}
s = s[sizec:]
t = t[sized:]
}
}
// lessCaseInsensitive compares s, t without allocating
func lessCaseInsensitive(s, t string) bool {
for {
if len(t) == 0 {
return false
}
if len(s) == 0 {
return true
}
c, sizec := utf8.DecodeRuneInString(s)
d, sized := utf8.DecodeRuneInString(t)
lowerc := unicode.ToLower(c)
lowerd := unicode.ToLower(d)
if lowerc < lowerd {
return true
}
if lowerc > lowerd {
return false
}
s = s[sizec:]
t = t[sized:]
}
}
func TestLessCaseInsensitive(t *testing.T) {
test := t
for i := 0; i < 1_000_000; i++ {
s := randomString(12 + i%4)
t := randomString(12 + i%5)
less1 := strings.ToLower(s) < strings.ToLower(t)
less2 := lessCaseInsensitive(s, t)
if less1 != less2 {
test.Fatalf("Case-insensitive %q < %q : %v, %v", s, t, less1, less2)
}
}
}
const (
minChar = 'A'
maxChar = 'z'
spanChar = maxChar - minChar
)
// e.g. "FbpXH\fgTAvx"
func randomString(n int) string {
buf := make([]byte, n)
for i := range buf {
buf[i] = minChar + byte(rand.Intn(spanChar))
}
return string(buf)
}
@Deleplace
Copy link
Author

Benchmark results:

$ go test -v -bench=. sortstrings_test.go 
=== RUN   TestLessCaseInsensitive
--- PASS: TestLessCaseInsensitive (0.91s)
goos: linux
goarch: amd64
cpu: Intel(R) Core(TM) i7-7600U CPU @ 2.80GHz
BenchmarkSort
BenchmarkSort-4                       	     650	   1831062 ns/op	      24 B/op	       1 allocs/op
BenchmarkSortLess
BenchmarkSortLess-4                   	     346	   3541560 ns/op	      56 B/op	       2 allocs/op
BenchmarkSortToLower
BenchmarkSortToLower-4                	      50	  24852428 ns/op	 4368782 B/op	  273046 allocs/op
BenchmarkSortInsensitiveOptimized
BenchmarkSortInsensitiveOptimized-4   	     265	   4715341 ns/op	      56 B/op	       2 allocs/op
PASS

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment