Skip to content

Instantly share code, notes, and snippets.

@sebnyberg
Last active August 5, 2022 11:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sebnyberg/f6501fc907e599e0441107653c23724a to your computer and use it in GitHub Desktop.
Save sebnyberg/f6501fc907e599e0441107653c23724a to your computer and use it in GitHub Desktop.
Sort + inplace dedup vs map
package a
import (
"math/rand"
"sort"
"testing"
)
var res []int
var res2 map[int]struct{}
func BenchmarkA(b *testing.B) {
nums := make([]int, 1e5)
for i := range nums {
nums[i] = rand.Intn(1e9)
}
b.Run("map", func(b *testing.B) {
// Naïve approach
a := make([]int, 1e5)
for i := 0; i < b.N; i++ {
b.StopTimer()
copy(a, nums)
b.StartTimer()
m := make(map[int]struct{})
for _, x := range a {
m[x] = struct{}{}
}
res2 = m
}
})
b.Run("map re-use", func(b *testing.B) {
// This test re-uses the map between iterations to reduce
// GC pressure. Note that the Go compiler recognizes the delete
// pattern and replaces it with a fast memclr.
a := make([]int, 1e5)
m := make(map[int]struct{})
for i := 0; i < b.N; i++ {
b.StopTimer()
copy(a, nums)
b.StartTimer()
for k := range m {
delete(m, k)
}
for _, x := range a {
m[x] = struct{}{}
}
res2 = m
}
})
b.Run("map with capacity", func(b *testing.B) {
a := make([]int, 1e5)
for i := 0; i < b.N; i++ {
b.StopTimer()
copy(a, nums)
b.StartTimer()
m := make(map[int]struct{}, 1e5/10)
for _, x := range a {
m[x] = struct{}{}
}
res2 = m
}
})
b.Run("inplace unsafe", func(b *testing.B) {
// This version is dangerous - it mutates the input slice.
a := make([]int, 1e5)
for i := 0; i < b.N; i++ {
b.StopTimer()
copy(a, nums)
b.StartTimer()
sort.Ints(a)
var j int
for i := range a {
if a[i] == a[j] {
continue
}
j++
a[j] = a[i]
}
a = a[:j+1]
res = a
}
})
b.Run("inplace safe", func(b *testing.B) {
for i := 0; i < b.N; i++ {
a := make([]int, 1e5)
copy(a, nums)
sort.Ints(a)
var j int
for i := range a {
if a[i] == a[j] {
continue
}
j++
a[j] = a[i]
}
a = a[:j+1]
res = a
}
})
b.Run("inplace safe with buffer", func(b *testing.B) {
// This version re-uses a buffer for sorting the input slice
a := make([]int, 1e5)
for i := 0; i < b.N; i++ {
copy(a, nums)
sort.Ints(a)
var j int
for i := range a {
if a[i] == a[j] {
continue
}
j++
a[j] = a[i]
}
a = a[:j+1]
res = a
}
})
}
@sebnyberg
Copy link
Author

sebnyberg commented Aug 5, 2022

Running on

goos: linux
goarch: amd64
cpu: Intel(R) Core(TM) i5-8500 CPU @ 3.00GHz

Benchmark approaches:

  • map - allocate a new map without capacity each time
  • map_re-use - re-use the same map as a buffer, clear with delete(m,k) each time
  • map_with_capacity - initialize map with up-front capacity
  • sort_unsafe - sort and in-place de-duplicate
  • sort_safe - allocate new slice and copy over data, then sort and de-dup
  • sort_safe_with_buffer - copy to buffer slice, sort, and de-dup

Results:

name                       time/op
A/map-6                    5.98ms ± 0%
A/map_re-use-6             3.30ms ± 0%
A/map_with_capacity-6      5.64ms ± 0%
A/sort_unsafe-6            11.8ms ± 0%
A/sort_safe-6              12.0ms ± 0%
A/sort_safe_with_buffer-6  11.9ms ± 0%

name                       alloc/op
A/map-6                    3.28MB ± 0%
A/map_re-use-6              231kB ± 0%
A/map_with_capacity-6      3.07MB ± 0%
A/sort_unsafe-6            8.30kB ± 0%
A/sort_safe-6               803kB ± 0%
A/sort_safe_with_buffer-6  8.30kB ± 0%

name                       allocs/op
A/map-6                     3.94k ± 0%
A/map_re-use-6              1.69k ± 0%
A/map_with_capacity-6       3.69k ± 0%
A/sort_unsafe-6              1.00 ± 0%
A/sort_safe-6                2.00 ± 0%
A/sort_safe_with_buffer-6    1.00 ± 0%

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment