Last active
August 5, 2022 11:30
-
-
Save sebnyberg/f6501fc907e599e0441107653c23724a to your computer and use it in GitHub Desktop.
Sort + inplace dedup vs map
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package a | |
import ( | |
"math/rand" | |
"sort" | |
"testing" | |
) | |
var res []int | |
var res2 map[int]struct{} | |
func BenchmarkA(b *testing.B) { | |
nums := make([]int, 1e5) | |
for i := range nums { | |
nums[i] = rand.Intn(1e9) | |
} | |
b.Run("map", func(b *testing.B) { | |
// Naïve approach | |
a := make([]int, 1e5) | |
for i := 0; i < b.N; i++ { | |
b.StopTimer() | |
copy(a, nums) | |
b.StartTimer() | |
m := make(map[int]struct{}) | |
for _, x := range a { | |
m[x] = struct{}{} | |
} | |
res2 = m | |
} | |
}) | |
b.Run("map re-use", func(b *testing.B) { | |
// This test re-uses the map between iterations to reduce | |
// GC pressure. Note that the Go compiler recognizes the delete | |
// pattern and replaces it with a fast memclr. | |
a := make([]int, 1e5) | |
m := make(map[int]struct{}) | |
for i := 0; i < b.N; i++ { | |
b.StopTimer() | |
copy(a, nums) | |
b.StartTimer() | |
for k := range m { | |
delete(m, k) | |
} | |
for _, x := range a { | |
m[x] = struct{}{} | |
} | |
res2 = m | |
} | |
}) | |
b.Run("map with capacity", func(b *testing.B) { | |
a := make([]int, 1e5) | |
for i := 0; i < b.N; i++ { | |
b.StopTimer() | |
copy(a, nums) | |
b.StartTimer() | |
m := make(map[int]struct{}, 1e5/10) | |
for _, x := range a { | |
m[x] = struct{}{} | |
} | |
res2 = m | |
} | |
}) | |
b.Run("inplace unsafe", func(b *testing.B) { | |
// This version is dangerous - it mutates the input slice. | |
a := make([]int, 1e5) | |
for i := 0; i < b.N; i++ { | |
b.StopTimer() | |
copy(a, nums) | |
b.StartTimer() | |
sort.Ints(a) | |
var j int | |
for i := range a { | |
if a[i] == a[j] { | |
continue | |
} | |
j++ | |
a[j] = a[i] | |
} | |
a = a[:j+1] | |
res = a | |
} | |
}) | |
b.Run("inplace safe", func(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
a := make([]int, 1e5) | |
copy(a, nums) | |
sort.Ints(a) | |
var j int | |
for i := range a { | |
if a[i] == a[j] { | |
continue | |
} | |
j++ | |
a[j] = a[i] | |
} | |
a = a[:j+1] | |
res = a | |
} | |
}) | |
b.Run("inplace safe with buffer", func(b *testing.B) { | |
// This version re-uses a buffer for sorting the input slice | |
a := make([]int, 1e5) | |
for i := 0; i < b.N; i++ { | |
copy(a, nums) | |
sort.Ints(a) | |
var j int | |
for i := range a { | |
if a[i] == a[j] { | |
continue | |
} | |
j++ | |
a[j] = a[i] | |
} | |
a = a[:j+1] | |
res = a | |
} | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Running on
Benchmark approaches:
map
- allocate a new map without capacity each timemap_re-use
- re-use the same map as a buffer, clear with delete(m,k) each timemap_with_capacity
- initialize map with up-front capacitysort_unsafe
- sort and in-place de-duplicatesort_safe
- allocate new slice and copy over data, then sort and de-dupsort_safe_with_buffer
- copy to buffer slice, sort, and de-dupResults: