-
-
Save lysu/39d4e114632d1790af171ed98ab7d8cb to your computer and use it in GitHub Desktop.
package main_test | |
import ( | |
"testing" | |
"fmt" | |
) | |
var longStr = "121212123123231231231231311111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111123123" | |
var shortStr = "1212121231232312312311" | |
func BenchmarkLC1(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy1(longStr) | |
} | |
} | |
func BenchmarkLC2(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy2(longStr) | |
} | |
} | |
func BenchmarkLC3(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy3(longStr) | |
} | |
} | |
func BenchmarkLC4(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy4(longStr) | |
} | |
} | |
func BenchmarkSC1(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy1(shortStr) | |
} | |
} | |
func BenchmarkSC2(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy2(shortStr) | |
} | |
} | |
func BenchmarkSC3(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy3(shortStr) | |
} | |
} | |
func BenchmarkSC4(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy4(shortStr) | |
} | |
} | |
func copy1(s string) string { | |
return fmt.Sprintf("%s", s) | |
} | |
func copy2(s string) string { | |
return string([]byte(s)) | |
} | |
func copy3(a string) string { | |
return (a + " ")[:len(a)] | |
} | |
func copy4(a string) string { | |
if len(a) == 0 { | |
return "" | |
} | |
return a[0:1] + a[1:] | |
} |
lysu
commented
Jul 16, 2018
•
here is my code:
package main
import (
"testing"
)
func copyString(src string) (dst string) {
return src + ""
}
func copyBytes(src, dst []byte) []byte {
copy(dst, src)
return dst
}
func BenchmarkCopyString(b *testing.B) {
src := "123"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = copyString(src)
}
}
func BenchmarkCopyBytes(b *testing.B) {
src := []byte{'1', '2', '3'}
dst := make([]byte, len(src))
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = copyBytes(src, dst)
}
}
The benchmark result is:
➜ go test -bench BenchmarkCopy -run xx -count 5
goos: darwin
goarch: amd64
BenchmarkCopyString-8 200000000 7.55 ns/op
BenchmarkCopyString-8 200000000 7.53 ns/op
BenchmarkCopyString-8 200000000 7.52 ns/op
BenchmarkCopyString-8 200000000 7.76 ns/op
BenchmarkCopyString-8 200000000 7.60 ns/op
BenchmarkCopyBytes-8 500000000 3.07 ns/op
BenchmarkCopyBytes-8 500000000 3.08 ns/op
BenchmarkCopyBytes-8 500000000 3.04 ns/op
BenchmarkCopyBytes-8 500000000 3.36 ns/op
BenchmarkCopyBytes-8 500000000 3.34 ns/op
PASS
ok _/tmp 21.104s
NOTE: BenchmarkCopyBytes
haven't take the overhead of make([]byte, len(src))
into consideration
@zz-jason but with -benchmem
flag, src + ""
seems ZERO alloc.....
Here is the profile result of BenchmarkCopyString
➜ go tool pprof tmp.test copystring.profile
File: tmp.test
Type: cpu
Time: Jul 16, 2018 at 1:41pm (CST)
Duration: 12.09s, Total samples = 10.10s (83.54%)
Entering interactive mode (type "help" for commands, "o" for options)
(pprof) top
Showing nodes accounting for 10060ms, 99.60% of 10100ms total
Dropped 7 nodes (cum <= 50.50ms)
Showing top 10 nodes out of 21
flat flat% sum% cum cum%
4850ms 48.02% 48.02% 4850ms 48.02% runtime.concatstrings
2490ms 24.65% 72.67% 7340ms 72.67% runtime.concatstring2
1310ms 12.97% 85.64% 8650ms 85.64% _/tmp.BenchmarkCopyString
1280ms 12.67% 98.32% 1280ms 12.67% runtime.usleep
130ms 1.29% 99.60% 130ms 1.29% runtime.mach_semaphore_signal
0 0% 99.60% 130ms 1.29% runtime.goready
0 0% 99.60% 130ms 1.29% runtime.goready.func1
0 0% 99.60% 130ms 1.29% runtime.goroutineReady
0 0% 99.60% 130ms 1.29% runtime.mach_semrelease
0 0% 99.60% 1280ms 12.67% runtime.mstart
It shows that 72% cumulative CPU time is on concatstring2
Here is another test, I change return src + ""
to return src + " "
, and you can see the memmove
in the profile:
➜ go test -bench BenchmarkCopyString -count 5 -cpuprofile copystring.profile --benchmem
goos: darwin
goarch: amd64
BenchmarkCopyString-8 100000000 21.0 ns/op 0 B/op 0 allocs/op
BenchmarkCopyString-8 100000000 21.5 ns/op 0 B/op 0 allocs/op
BenchmarkCopyString-8 100000000 19.8 ns/op 0 B/op 0 allocs/op
BenchmarkCopyString-8 100000000 19.4 ns/op 0 B/op 0 allocs/op
BenchmarkCopyString-8 100000000 19.3 ns/op 0 B/op 0 allocs/op
PASS
ok _/tmp 10.383s
[jianzhang.zj:/tmp]
➜ go tool pprof tmp.test copystring.profile
File: tmp.test
Type: cpu
Time: Jul 16, 2018 at 1:45pm (CST)
Duration: 10.37s, Total samples = 8.62s (83.10%)
Entering interactive mode (type "help" for commands, "o" for options)
(pprof) top
Showing nodes accounting for 8.57s, 99.42% of 8.62s total
Dropped 36 nodes (cum <= 0.04s)
Showing top 10 nodes out of 23
flat flat% sum% cum cum%
4.36s 50.58% 50.58% 6.48s 75.17% runtime.concatstrings
1.26s 14.62% 65.20% 1.26s 14.62% runtime.memmove
0.91s 10.56% 75.75% 0.91s 10.56% runtime.usleep
0.86s 9.98% 85.73% 0.86s 9.98% runtime.rawstringtmp
0.68s 7.89% 93.62% 7.16s 83.06% runtime.concatstring2
0.44s 5.10% 98.72% 7.60s 88.17% _/tmp.BenchmarkCopyString
0.06s 0.7% 99.42% 0.06s 0.7% runtime.mach_semaphore_signal
0 0% 99.42% 0.06s 0.7% runtime.goready
0 0% 99.42% 0.06s 0.7% runtime.goready.func1
0 0% 99.42% 0.06s 0.7% runtime.goroutineReady
I think we should use -memprofilerate
:
go pprof --help
...
-memprofilerate n
Enable more precise (and expensive) memory profiles by setting
runtime.MemProfileRate. See 'go doc runtime.MemProfileRate'.
To profile all memory allocations, use -test.memprofilerate=1
and pass --alloc_space flag to the pprof tool.
func BenchmarkCopyString2(b *testing.B) {
a := "123"
for i := 0; i < b.N; i++ {
a = (a + "")[:len(a)]
}
}
func BenchmarkCopyString3(b *testing.B) {
a := "123"
for i := 0; i < b.N; i++{
a = string(hack.Slice(a))
}
}
func BenchmarkCopyString4(b *testing.B) {
a := "123"
for i:= 0; i< b.N; i ++{
a = string([]byte(a))
}
}
BenchmarkCopyString2-4 100000000 15.9 ns/op 0 B/op 0 allocs/op
BenchmarkCopyString3-4 50000000 42.6 ns/op 3 B/op 1 allocs/op
BenchmarkCopyString4-4 30000000 40.0 ns/op 3 B/op 1 allocs/op