Created
January 16, 2013 12:44
-
-
Save donovanhide/4546886 to your computer and use it in GitHub Desktop.
sort.Interface compatible merge sort
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
go test -bench=. | |
PASS | |
BenchmarkSortString1K 5000 386508 ns/op | |
BenchmarkSortInt1K 10000 194999 ns/op | |
BenchmarkSortInt64K 100 19355025 ns/op | |
ok _/usr/local/code/timsort 9.003s | |
go test -run=TestSortBM | |
PASS | |
ok _/usr/local/code/timsort 0.607s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package merge | |
func Sort(data Interface) { | |
if data == nil || data.Len() < 2 { | |
return | |
} | |
scratch := make([]int, data.Len()) | |
var mergeSort func(start, end int) | |
mergeSort = func(start, end int) { | |
size := end - start | |
middle := start + (size / 2) | |
switch { | |
case size < 2: | |
return | |
case size < 7: | |
insertionSort(data, start, end) | |
return | |
} | |
mergeSort(start, middle) | |
mergeSort(middle, end) | |
li, ls := start, middle | |
ri, rs := middle, end | |
cursor := 0 | |
for ; li < ls && ri < rs; cursor++ { | |
if data.Less(li, ri) { | |
scratch[li-start] = cursor | |
li++ | |
} else { | |
scratch[ri-start] = cursor | |
ri++ | |
} | |
} | |
for ; li < ls; li++ { | |
scratch[li-start] = cursor | |
cursor++ | |
} | |
for i := range scratch[:cursor] { | |
for j := scratch[i]; j != i; { | |
data.Swap(start+i, start+j) | |
scratch[i], scratch[j], j = scratch[j], scratch[i], scratch[j] | |
} | |
} | |
} | |
mergeSort(0, data.Len()) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2009 The Go Authors. All rights reserved. | |
// Use of this source code is governed by a BSD-style | |
// license that can be found in the LICENSE file. | |
package merge | |
import ( | |
"fmt" | |
"math" | |
"math/rand" | |
"strconv" | |
"testing" | |
) | |
var ints = [...]int{74, 59, 238, -784, 9845, 959, 905, 0, 0, 42, 7586, -5467984, 7586} | |
var float64s = [...]float64{74.3, 59.0, math.Inf(1), 238.2, -784.0, 2.3, math.NaN(), math.NaN(), math.Inf(-1), 9845.768, -959.7485, 905, 7.8, 7.8} | |
var strings = [...]string{"", "Hello", "foo", "bar", "foo", "f00", "%*&^*&^&", "***"} | |
func TestSortIntSlice(t *testing.T) { | |
data := ints | |
a := IntSlice(data[0:]) | |
Sort(a) | |
if !IsSorted(a) { | |
t.Errorf("sorted %v", ints) | |
t.Errorf(" got %v", data) | |
} | |
} | |
func TestSortFloat64Slice(t *testing.T) { | |
data := float64s | |
a := Float64Slice(data[0:]) | |
Sort(a) | |
if !IsSorted(a) { | |
t.Errorf("sorted %v", float64s) | |
t.Errorf(" got %v", data) | |
} | |
} | |
func TestSortStringSlice(t *testing.T) { | |
data := strings | |
a := StringSlice(data[0:]) | |
Sort(a) | |
if !IsSorted(a) { | |
t.Errorf("sorted %v", strings) | |
t.Errorf(" got %v", data) | |
} | |
} | |
func TestInts(t *testing.T) { | |
data := ints | |
Ints(data[0:]) | |
if !IntsAreSorted(data[0:]) { | |
t.Errorf("sorted %v", ints) | |
t.Errorf(" got %v", data) | |
} | |
} | |
func TestFloat64s(t *testing.T) { | |
data := float64s | |
Float64s(data[0:]) | |
if !Float64sAreSorted(data[0:]) { | |
t.Errorf("sorted %v", float64s) | |
t.Errorf(" got %v", data) | |
} | |
} | |
func TestStrings(t *testing.T) { | |
data := strings | |
Strings(data[0:]) | |
if !StringsAreSorted(data[0:]) { | |
t.Errorf("sorted %v", strings) | |
t.Errorf(" got %v", data) | |
} | |
} | |
func TestSortLarge_Random(t *testing.T) { | |
n := 1000000 | |
if testing.Short() { | |
n /= 100 | |
} | |
data := make([]int, n) | |
for i := 0; i < len(data); i++ { | |
data[i] = rand.Intn(100) | |
} | |
if IntsAreSorted(data) { | |
t.Fatalf("terrible rand.rand") | |
} | |
Ints(data) | |
if !IntsAreSorted(data) { | |
t.Errorf("sort didn't sort - 1M ints") | |
} | |
} | |
func BenchmarkSortString1K(b *testing.B) { | |
b.StopTimer() | |
for i := 0; i < b.N; i++ { | |
data := make([]string, 1<<10) | |
for i := 0; i < len(data); i++ { | |
data[i] = strconv.Itoa(i ^ 0x2cc) | |
} | |
b.StartTimer() | |
Strings(data) | |
b.StopTimer() | |
} | |
} | |
func BenchmarkSortInt1K(b *testing.B) { | |
b.StopTimer() | |
for i := 0; i < b.N; i++ { | |
data := make([]int, 1<<10) | |
for i := 0; i < len(data); i++ { | |
data[i] = i ^ 0x2cc | |
} | |
b.StartTimer() | |
Ints(data) | |
b.StopTimer() | |
} | |
} | |
func BenchmarkSortInt64K(b *testing.B) { | |
b.StopTimer() | |
for i := 0; i < b.N; i++ { | |
data := make([]int, 1<<16) | |
for i := 0; i < len(data); i++ { | |
data[i] = i ^ 0xcccc | |
} | |
b.StartTimer() | |
Ints(data) | |
b.StopTimer() | |
} | |
} | |
const ( | |
_Sawtooth = iota | |
_Rand | |
_Stagger | |
_Plateau | |
_Shuffle | |
_NDist | |
) | |
const ( | |
_Copy = iota | |
_Reverse | |
_ReverseFirstHalf | |
_ReverseSecondHalf | |
_Sorted | |
_Dither | |
_NMode | |
) | |
type testingData struct { | |
desc string | |
t *testing.T | |
data []int | |
maxswap int // number of swaps allowed | |
nswap int | |
} | |
func (d *testingData) Len() int { return len(d.data) } | |
func (d *testingData) Less(i, j int) bool { return d.data[i] < d.data[j] } | |
func (d *testingData) Swap(i, j int) { | |
if d.nswap >= d.maxswap { | |
d.t.Errorf("%s: used %d swaps sorting slice of %d", d.desc, d.nswap, len(d.data)) | |
d.t.FailNow() | |
} | |
d.nswap++ | |
d.data[i], d.data[j] = d.data[j], d.data[i] | |
} | |
func min(a, b int) int { | |
if a < b { | |
return a | |
} | |
return b | |
} | |
func lg(n int) int { | |
i := 0 | |
for 1<<uint(i) < n { | |
i++ | |
} | |
return i | |
} | |
func testBentleyMcIlroy(t *testing.T, sort func(Interface)) { | |
sizes := []int{100, 1023, 1024, 1025} | |
if testing.Short() { | |
sizes = []int{100, 127, 128, 129} | |
} | |
dists := []string{"sawtooth", "rand", "stagger", "plateau", "shuffle"} | |
modes := []string{"copy", "reverse", "reverse1", "reverse2", "sort", "dither"} | |
var tmp1, tmp2 [1025]int | |
for ni := 0; ni < len(sizes); ni++ { | |
n := sizes[ni] | |
for m := 1; m < 2*n; m *= 2 { | |
for dist := 0; dist < _NDist; dist++ { | |
j := 0 | |
k := 1 | |
data := tmp1[0:n] | |
for i := 0; i < n; i++ { | |
switch dist { | |
case _Sawtooth: | |
data[i] = i % m | |
case _Rand: | |
data[i] = rand.Intn(m) | |
case _Stagger: | |
data[i] = (i*m + i) % n | |
case _Plateau: | |
data[i] = min(i, m) | |
case _Shuffle: | |
if rand.Intn(m) != 0 { | |
j += 2 | |
data[i] = j | |
} else { | |
k += 2 | |
data[i] = k | |
} | |
} | |
} | |
mdata := tmp2[0:n] | |
for mode := 0; mode < _NMode; mode++ { | |
switch mode { | |
case _Copy: | |
for i := 0; i < n; i++ { | |
mdata[i] = data[i] | |
} | |
case _Reverse: | |
for i := 0; i < n; i++ { | |
mdata[i] = data[n-i-1] | |
} | |
case _ReverseFirstHalf: | |
for i := 0; i < n/2; i++ { | |
mdata[i] = data[n/2-i-1] | |
} | |
for i := n / 2; i < n; i++ { | |
mdata[i] = data[i] | |
} | |
case _ReverseSecondHalf: | |
for i := 0; i < n/2; i++ { | |
mdata[i] = data[i] | |
} | |
for i := n / 2; i < n; i++ { | |
mdata[i] = data[n-(i-n/2)-1] | |
} | |
case _Sorted: | |
for i := 0; i < n; i++ { | |
mdata[i] = data[i] | |
} | |
// Ints is known to be correct | |
// because mode Sort runs after mode _Copy. | |
Ints(mdata) | |
case _Dither: | |
for i := 0; i < n; i++ { | |
mdata[i] = data[i] + i%5 | |
} | |
} | |
desc := fmt.Sprintf("n=%d m=%d dist=%s mode=%s", n, m, dists[dist], modes[mode]) | |
d := &testingData{desc, t, mdata[0:n], n * lg(n) * 12 / 10, 0} | |
sort(d) | |
// If we were testing C qsort, we'd have to make a copy | |
// of the slice and sort it ourselves and then compare | |
// x against it, to ensure that qsort was only permuting | |
// the data, not (for example) overwriting it with zeros. | |
// | |
// In go, we don't have to be so paranoid: since the only | |
// mutating method Sort can call is TestingData.swap, | |
// it suffices here just to check that the final slice is sorted. | |
if !IntsAreSorted(mdata) { | |
t.Errorf("%s: ints not sorted", desc) | |
t.Errorf("\t%v", mdata) | |
t.FailNow() | |
} | |
} | |
} | |
} | |
} | |
} | |
func TestSortBM(t *testing.T) { | |
testBentleyMcIlroy(t, Sort) | |
} | |
// func TestHeapsortBM(t *testing.T) { | |
// testBentleyMcIlroy(t, Heapsort) | |
// } | |
// This is based on the "antiquicksort" implementation by M. Douglas McIlroy. | |
// See http://www.cs.dartmouth.edu/~doug/mdmspe.pdf for more info. | |
type adversaryTestingData struct { | |
data []int | |
keys map[int]int | |
candidate int | |
} | |
func (d *adversaryTestingData) Len() int { return len(d.data) } | |
func (d *adversaryTestingData) Less(i, j int) bool { | |
if _, present := d.keys[i]; !present { | |
if _, present := d.keys[j]; !present { | |
if i == d.candidate { | |
d.keys[i] = len(d.keys) | |
} else { | |
d.keys[j] = len(d.keys) | |
} | |
} | |
} | |
if _, present := d.keys[i]; !present { | |
d.candidate = i | |
return false | |
} | |
if _, present := d.keys[j]; !present { | |
d.candidate = j | |
return true | |
} | |
return d.keys[i] >= d.keys[j] | |
} | |
func (d *adversaryTestingData) Swap(i, j int) { | |
d.data[i], d.data[j] = d.data[j], d.data[i] | |
} | |
func TestAdversary(t *testing.T) { | |
const size = 100 | |
data := make([]int, size) | |
for i := 0; i < size; i++ { | |
data[i] = i | |
} | |
d := &adversaryTestingData{data, make(map[int]int), 0} | |
Sort(d) // This should degenerate to heapsort. | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pwd | |
/usr/local/src/go/src/pkg/sort | |
go test -bench=. | |
PASS | |
BenchmarkSortString1K 2000 785649 ns/op | |
BenchmarkSortInt1K 5000 354671 ns/op | |
BenchmarkSortInt64K 50 35170143 ns/op | |
ok sort 6.815s | |
go test -run=TestSortBM | |
PASS | |
ok sort 0.383s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package merge | |
import ( | |
"math" | |
"sort" | |
) | |
type Interface interface { | |
sort.Interface | |
} | |
// Definitions take from /src/pkg/sort/sort.go | |
func insertionSort(data Interface, a, b int) { | |
for i := a + 1; i < b; i++ { | |
for j := i; j > a && data.Less(j, j-1); j-- { | |
data.Swap(j, j-1) | |
} | |
} | |
} | |
// IsSorted reports whether data is sorted. | |
func IsSorted(data Interface) bool { | |
n := data.Len() | |
for i := n - 1; i > 0; i-- { | |
if data.Less(i, i-1) { | |
return false | |
} | |
} | |
return true | |
} | |
// Convenience types for common cases | |
// IntSlice attaches the methods of Interface to []int, sorting in increasing order. | |
type IntSlice []int | |
func (p IntSlice) Len() int { return len(p) } | |
func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] } | |
func (p IntSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } | |
// Sort is a convenience method. | |
func (p IntSlice) Sort() { Sort(p) } | |
// Float64Slice attaches the methods of Interface to []float64, sorting in increasing order. | |
type Float64Slice []float64 | |
func (p Float64Slice) Len() int { return len(p) } | |
func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || math.IsNaN(p[i]) && !math.IsNaN(p[j]) } | |
func (p Float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } | |
// Sort is a convenience method. | |
func (p Float64Slice) Sort() { Sort(p) } | |
// StringSlice attaches the methods of Interface to []string, sorting in increasing order. | |
type StringSlice []string | |
func (p StringSlice) Len() int { return len(p) } | |
func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] } | |
func (p StringSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } | |
// Sort is a convenience method. | |
func (p StringSlice) Sort() { Sort(p) } | |
// Convenience wrappers for common cases | |
// Ints sorts a slice of ints in increasing order. | |
func Ints(a []int) { Sort(IntSlice(a)) } | |
// Float64s sorts a slice of float64s in increasing order. | |
func Float64s(a []float64) { Sort(Float64Slice(a)) } | |
// Strings sorts a slice of strings in increasing order. | |
func Strings(a []string) { Sort(StringSlice(a)) } | |
// IntsAreSorted tests whether a slice of ints is sorted in increasing order. | |
func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) } | |
// Float64sAreSorted tests whether a slice of float64s is sorted in increasing order. | |
func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) } | |
// StringsAreSorted tests whether a slice of strings is sorted in increasing order. | |
func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment