Skip to content

Instantly share code, notes, and snippets.

@aliuygur
Last active July 22, 2022 09:48
Show Gist options
  • Save aliuygur/16c66b4249cb42715091fe010eec7e33 to your computer and use it in GitHub Desktop.
Save aliuygur/16c66b4249cb42715091fe010eec7e33 to your computer and use it in GitHub Desktop.
removes duplicate values in given slice

this algorithm 10x faster than https://www.dotnetperls.com/duplicates-go and zero allocation

➜  api git:(master) ✗ go test -v -bench=. main_test.go
=== RUN   TestSliceUniq
--- PASS: TestSliceUniq (0.00s)
BenchmarkSliceUniq-4             3000000               439 ns/op               0 B/op          0 allocs/op
BenchmarkRemoveDuplicates-4       500000              4599 ns/op             571 B/op          8 allocs/op
PASS
ok      command-line-arguments  4.112s
package main
import "testing"
var ids = []int{1, 1, 1, 3, 5, 7, 9, 10, 10, 1, 1, 1, 1, 1, 1, 1, 1, 19, 8, 9, 90, 0, 0, 0, 0, 0, 0, 10, 10}
// SliceUniq removes duplicate values in given slice
func SliceUniq(s []int) []int {
for i := 0; i < len(s); i++ {
for i2 := i + 1; i2 < len(s); i2++ {
if s[i] == s[i2] {
// delete
s = append(s[:i2], s[i2+1:]...)
i2--
}
}
}
return s
}
// RemoveDuplicates removes duplicate values in given slice
// This is ~10x slower and 8 allocs/op algorithm that explained on https://www.dotnetperls.com/duplicates-go
func RemoveDuplicates(elements []int) []int {
// Use map to record duplicates as we find them.
encountered := map[int]bool{}
result := []int{}
for v := range elements {
if encountered[elements[v]] == true {
// Do not add duplicate.
} else {
// Record this element as an encountered element.
encountered[elements[v]] = true
// Append to result slice.
result = append(result, elements[v])
}
}
// Return the new slice.
return result
}
func TestSliceUniq(t *testing.T) {
testcases := []struct {
s []int
r []int
e bool
}{
{[]int{1, 1, 1, 2, 2, 2, 1}, []int{1, 2}, true},
{[]int{1}, []int{1}, true},
{[]int{9, 9, 9, 9, 9, 1, 9, 9, 9}, []int{9, 1}, true},
{[]int{}, []int{}, true},
{[]int{1, 1, 1}, []int{1, 1}, false},
{[]int{}, []int{}, true},
{nil, []int{}, false},
{nil, nil, true},
}
for _, tc := range testcases {
r := SliceUniq(tc.s)
if sliceEq(r, tc.r) != tc.e {
t.Errorf("Expected SliceUniq(%v) to be %v got %v", tc.s, tc.r, r)
}
}
}
func BenchmarkSliceUniq(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
SliceUniq(ids)
}
}
func BenchmarkRemoveDuplicates(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
RemoveDuplicates(ids)
}
}
func sliceEq(a, b []int) bool {
if a == nil && b == nil {
return true
}
if a == nil || b == nil {
return false
}
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
@dchapes
Copy link

dchapes commented Dec 14, 2016

As mentioned elsewhere:

It depends on the input data. You've replaced an O(n) algorithm with an O(n^2) one (approximately at least, not accounting for memory copying or that map access isn't O(1)). Pass in a slice of 1000+ elements and yours is ~5× slower; make it 10,000+ elements and yours is closer to 40× slower.

Given that both are probably fast enough for small n (such as your n=29 benchmark) it's dangerous to use a solution that blows up on medium sided input.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment