Skip to content

Instantly share code, notes, and snippets.

@linnv
Last active Jul 31, 2019
Embed
What would you like to do?
merge sort parallelly, reuse slice
package main
import (
"sync"
)
const parallelBySliceLength = 1 << 10 * 3
func MergeSortLoop(src []int64) {
srcReadOnly := make([]int64, len(src))//update only in merge stage
copy(srcReadOnly, src)
mergeSortDivideLoop(src, srcReadOnly, 0, len(src))
}
func mergeSortDivideLoop(srcWriteOnly, srcReadOnly []int64, indexLeftPart, indexRightPart int) {
srcLen := indexRightPart - indexLeftPart
if indexRightPart <= indexLeftPart || srcLen == 1 {
return
}
mid := (indexRightPart + indexLeftPart) / 2
const parallelBySliceLength = 1 << 10
if mid-indexLeftPart > parallelBySliceLength || indexRightPart-mid > parallelBySliceLength {
var wg sync.WaitGroup
wg.Add(2)
go func() {
defer wg.Done()
mergeSortDivideLoop(srcWriteOnly, srcReadOnly, indexLeftPart, mid)
return
}()
go func() {
defer wg.Done()
mergeSortDivideLoop(srcWriteOnly, srcReadOnly, mid, indexRightPart)
return
}()
wg.Wait()
mergeSortConqueLoop(srcWriteOnly, srcReadOnly, indexLeftPart, indexRightPart)
return
}
mergeSortDivideLoop(srcWriteOnly, srcReadOnly, indexLeftPart, mid)
mergeSortDivideLoop(srcWriteOnly, srcReadOnly, mid, indexRightPart)
mergeSortConqueLoop(srcWriteOnly, srcReadOnly, indexLeftPart, indexRightPart)
return
}
func mergeSortConqueLoop(srcWriteOnly, srcReadOnly []int64, indexLeftPart, indexRightPart int) {
mid := (indexRightPart + indexLeftPart) / 2
indexLeftPartMove, indexRightPartMove, IndexRetMove := indexLeftPart, mid, indexLeftPart
for {
if indexLeftPartMove < mid && indexRightPartMove < indexRightPart {
if srcReadOnly[indexLeftPartMove] <= srcReadOnly[indexRightPartMove] {
srcWriteOnly[IndexRetMove] = srcReadOnly[indexLeftPartMove]
indexLeftPartMove++
} else {
srcWriteOnly[IndexRetMove] = srcReadOnly[indexRightPartMove]
indexRightPartMove++
}
IndexRetMove++
} else {
break
}
}
for i := indexLeftPartMove; i < mid; i++ {
srcWriteOnly[IndexRetMove] = srcReadOnly[i]
IndexRetMove++
}
for i := indexRightPartMove; i < indexRightPart; i++ {
srcWriteOnly[IndexRetMove] = srcReadOnly[i]
IndexRetMove++
}
for i := indexLeftPart; i < indexRightPart; i++ {
srcReadOnly[i] = srcWriteOnly[i]
}
return
}
@linnv
Copy link
Author

linnv commented Jul 31, 2019

original, no reuse no parallel processing

go test
OK: 1 passed
PASS
ok  	pingcap/talentplan/tidb/mergesort	1.077s
go test -bench Benchmark -run xx -count 5 -benchmem
goos: linux
goarch: amd64
pkg: pingcap/talentplan/tidb/mergesort
BenchmarkMergeSort-8    	       1	3893507483 ns/op	6442451344 B/op	50331649 allocs/op
BenchmarkMergeSort-8    	       1	3808637998 ns/op	6442451136 B/op	50331647 allocs/op
BenchmarkMergeSort-8    	       1	3795119774 ns/op	6442450976 B/op	50331645 allocs/op
BenchmarkMergeSort-8    	       1	3772817928 ns/op	6442452912 B/op	50331651 allocs/op
BenchmarkMergeSort-8    	       1	3814236845 ns/op	6442450944 B/op	50331645 allocs/op
BenchmarkNormalSort-8   	       1	3071768257 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3072594679 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3092922348 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3069391335 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3057305870 ns/op	      64 B/op	       2 allocs/op
PASS
ok  	pingcap/talentplan/tidb/mergesort	37.938s

reuse src only

go test
OK: 1 passed
PASS
ok  	pingcap/talentplan/tidb/mergesort	0.938s
go test -bench Benchmark -run xx -count 5 -benchmem
goos: linux
goarch: amd64
pkg: pingcap/talentplan/tidb/mergesort
BenchmarkMergeSort-8    	       1	2513324261 ns/op	3221225664 B/op	16777217 allocs/op
BenchmarkMergeSort-8    	       1	2396414536 ns/op	3221225568 B/op	16777216 allocs/op
BenchmarkMergeSort-8    	       1	2395802709 ns/op	3221225472 B/op	16777215 allocs/op
BenchmarkMergeSort-8    	       1	2381100690 ns/op	3221225472 B/op	16777215 allocs/op
BenchmarkMergeSort-8    	       1	2430918214 ns/op	3221225568 B/op	16777216 allocs/op
BenchmarkNormalSort-8   	       1	3089350242 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3069443714 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3057800599 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3078103868 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3076057507 ns/op	      64 B/op	       2 allocs/op
PASS
ok  	pingcap/talentplan/tidb/mergesort	30.922s

parallel processing, no reuse merge slice

#parallelBySliceLength = 1024

go test
OK: 1 passed
PASS
ok      pingcap/talentplan/tidb/mergesort       0.826s
go test -bench Benchmark -run xx -count 5 -benchmem
goos: linux
goarch: amd64
pkg: pingcap/talentplan/tidb/mergesort
BenchmarkMergeSort-8                   1        1694944329 ns/op        4296567280 B/op 50347580 allocs/op
BenchmarkMergeSort-8                   1        8000233542 ns/op        4295629248 B/op 50345188 allocs/op
BenchmarkMergeSort-8                   1        1185773980 ns/op        4295821008 B/op 50346110 allocs/op
BenchmarkMergeSort-8                   1        1234768689 ns/op        4295706672 B/op 50346651 allocs/op
BenchmarkMergeSort-8                   1        1294251409 ns/op        4296080336 B/op 50348584 allocs/op
BenchmarkNormalSort-8                  1        3045929957 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3007450279 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3030842869 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3040292669 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        2996486844 ns/op              64 B/op          2 allocs/op
PASS
ok      pingcap/talentplan/tidb/mergesort       32.042s


#parallelBySliceLength = 2024 
 
go test
OK: 1 passed
PASS
ok      pingcap/talentplan/tidb/mergesort       0.872s
go test -bench Benchmark -run xx -count 5 -benchmem
goos: linux
goarch: amd64
pkg: pingcap/talentplan/tidb/mergesort
BenchmarkMergeSort-8                   1        1425721789 ns/op        4299015840 B/op 50359775 allocs/op
BenchmarkMergeSort-8                   1        1420411201 ns/op        4295930512 B/op 50350815 allocs/op
BenchmarkMergeSort-8                   1        1208265296 ns/op        4295700736 B/op 50347640 allocs/op
BenchmarkMergeSort-8                   1        1199947560 ns/op        4295674336 B/op 50347394 allocs/op
BenchmarkMergeSort-8                   1        1218982727 ns/op        4296968560 B/op 50356995 allocs/op
BenchmarkNormalSort-8                  1        3023332152 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3045563032 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3032512902 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3031763179 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3016555534 ns/op              64 B/op          2 allocs/op
PASS


#parallelBySliceLength = 512
 
go test
OK: 1 passed
PASS
ok      pingcap/talentplan/tidb/mergesort       0.845s
go test -bench Benchmark -run xx -count 5 -benchmem
goos: linux
goarch: amd64
pkg: pingcap/talentplan/tidb/mergesort
BenchmarkMergeSort-8                   1        1833221943 ns/op        4312226576 B/op 50435369 allocs/op
BenchmarkMergeSort-8                   1        4082108056 ns/op        4298228144 B/op 50385637 allocs/op
BenchmarkMergeSort-8                   1        1862066891 ns/op        4298917216 B/op 50394234 allocs/op
BenchmarkMergeSort-8                   1        1434346895 ns/op        4298015920 B/op 50382980 allocs/op
BenchmarkMergeSort-8                   1        1555472650 ns/op        4299001936 B/op 50395290 allocs/op
BenchmarkNormalSort-8                  1        3089589588 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3093667622 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3075541466 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3081735903 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3078504099 ns/op              64 B/op          2 allocs/op
PASS
ok      pingcap/talentplan/tidb/mergesort       29.769s


#parallelBySliceLength = 5120
 
go test
OK: 1 passed
PASS
ok      pingcap/talentplan/tidb/mergesort       0.829s
go test -bench Benchmark -run xx -count 5 -benchmem
goos: linux
goarch: amd64
pkg: pingcap/talentplan/tidb/mergesort
BenchmarkMergeSort-8                   1        1258697941 ns/op        4295613152 B/op 50336960 allocs/op
BenchmarkMergeSort-8                   1        1243773689 ns/op        4295215984 B/op 50335559 allocs/op
BenchmarkMergeSort-8                   1        1235887467 ns/op        4295990064 B/op 50339741 allocs/op
BenchmarkMergeSort-8                   1        1184990667 ns/op        4295361360 B/op 50338256 allocs/op
BenchmarkMergeSort-8                   1        1173499904 ns/op        4295203040 B/op 50336337 allocs/op
BenchmarkNormalSort-8                  1        3037254619 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3080901269 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3066027086 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3076599704 ns/op              64 B/op          2 allocs/op
BenchmarkNormalSort-8                  1        3077239312 ns/op              64 B/op          2 allocs/op
PASS
ok      pingcap/talentplan/tidb/mergesort       24.901s

parallel processing, reuse merge slice by global slice

go test
### parallelBySliceLength 1024

OK: 1 passed
PASS
ok  	pingcap/talentplan/tidb/mergesort	0.774s
go test -bench Benchmark -run xx -count 5 -benchmem
goos: linux
goarch: amd64
pkg: pingcap/talentplan/tidb/mergesort
BenchmarkMergeSort-8    	       2	 746537292 ns/op	134673056 B/op	   10896 allocs/op
BenchmarkMergeSort-8    	       2	 710619528 ns/op	134532304 B/op	   10510 allocs/op
BenchmarkMergeSort-8    	       2	 729728190 ns/op	134565920 B/op	   10700 allocs/op
BenchmarkMergeSort-8    	       2	 724261968 ns/op	134604528 B/op	   10997 allocs/op
BenchmarkMergeSort-8    	       2	 727397725 ns/op	134583616 B/op	   10971 allocs/op
BenchmarkNormalSort-8   	       1	3038556605 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3059714942 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3055920706 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3044871720 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3070945895 ns/op	      64 B/op	       2 allocs/op
PASS
ok  	pingcap/talentplan/tidb/mergesort	31.348s

go test
### parallelBySliceLength 2048
OK: 1 passed
PASS
ok  	pingcap/talentplan/tidb/mergesort	0.789s
go test -bench Benchmark -run xx -count 5 -benchmem
goos: linux
goarch: amd64
pkg: pingcap/talentplan/tidb/mergesort
BenchmarkMergeSort-8    	       2	 746985357 ns/op	134709488 B/op	   11409 allocs/op
BenchmarkMergeSort-8    	       2	 727311453 ns/op	134559664 B/op	   10868 allocs/op
BenchmarkMergeSort-8    	       2	 718859453 ns/op	134543312 B/op	   10669 allocs/op
BenchmarkMergeSort-8    	       2	 729322940 ns/op	134556352 B/op	   10816 allocs/op
BenchmarkMergeSort-8    	       2	 724356281 ns/op	134545408 B/op	   10689 allocs/op
BenchmarkNormalSort-8   	       1	3039875363 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3042192861 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3058530864 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3057823013 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3073187248 ns/op	      64 B/op	       2 allocs/op
PASS
ok  	pingcap/talentplan/tidb/mergesort	31.397s

go test
### parallelBySliceLength 3072
 
OK: 1 passed
PASS
ok  	pingcap/talentplan/tidb/mergesort	0.776s
go test -bench Benchmark -run xx -count 5 -benchmem
goos: linux
goarch: amd64
pkg: pingcap/talentplan/tidb/mergesort
BenchmarkMergeSort-8    	       2	 740009248 ns/op	134580240 B/op	   10910 allocs/op
BenchmarkMergeSort-8    	       2	 725102111 ns/op	134518448 B/op	   10346 allocs/op
BenchmarkMergeSort-8    	       2	 721777166 ns/op	134543712 B/op	   10673 allocs/op
BenchmarkMergeSort-8    	       2	 724280146 ns/op	134542032 B/op	   10636 allocs/op
BenchmarkMergeSort-8    	       2	 736240459 ns/op	134602688 B/op	   11099 allocs/op
BenchmarkNormalSort-8   	       1	3057071466 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3049636560 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3044460905 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3054238309 ns/op	      64 B/op	       2 allocs/op
BenchmarkNormalSort-8   	       1	3039358926 ns/op	      64 B/op	       2 allocs/op
PASS
ok  	pingcap/talentplan/tidb/mergesort	31.371s

Loading

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment