Last active
July 16, 2018 06:43
-
-
Save lysu/39d4e114632d1790af171ed98ab7d8cb to your computer and use it in GitHub Desktop.
cp string
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main_test | |
import ( | |
"testing" | |
"fmt" | |
) | |
var longStr = "121212123123231231231231311111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111123123" | |
var shortStr = "1212121231232312312311" | |
func BenchmarkLC1(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy1(longStr) | |
} | |
} | |
func BenchmarkLC2(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy2(longStr) | |
} | |
} | |
func BenchmarkLC3(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy3(longStr) | |
} | |
} | |
func BenchmarkLC4(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy4(longStr) | |
} | |
} | |
func BenchmarkSC1(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy1(shortStr) | |
} | |
} | |
func BenchmarkSC2(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy2(shortStr) | |
} | |
} | |
func BenchmarkSC3(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy3(shortStr) | |
} | |
} | |
func BenchmarkSC4(b *testing.B) { | |
for i := 0; i < b.N; i++ { | |
longStr = copy4(shortStr) | |
} | |
} | |
func copy1(s string) string { | |
return fmt.Sprintf("%s", s) | |
} | |
func copy2(s string) string { | |
return string([]byte(s)) | |
} | |
func copy3(a string) string { | |
return (a + " ")[:len(a)] | |
} | |
func copy4(a string) string { | |
if len(a) == 0 { | |
return "" | |
} | |
return a[0:1] + a[1:] | |
} |
Here is the profile result of BenchmarkCopyString
➜ go tool pprof tmp.test copystring.profile
File: tmp.test
Type: cpu
Time: Jul 16, 2018 at 1:41pm (CST)
Duration: 12.09s, Total samples = 10.10s (83.54%)
Entering interactive mode (type "help" for commands, "o" for options)
(pprof) top
Showing nodes accounting for 10060ms, 99.60% of 10100ms total
Dropped 7 nodes (cum <= 50.50ms)
Showing top 10 nodes out of 21
flat flat% sum% cum cum%
4850ms 48.02% 48.02% 4850ms 48.02% runtime.concatstrings
2490ms 24.65% 72.67% 7340ms 72.67% runtime.concatstring2
1310ms 12.97% 85.64% 8650ms 85.64% _/tmp.BenchmarkCopyString
1280ms 12.67% 98.32% 1280ms 12.67% runtime.usleep
130ms 1.29% 99.60% 130ms 1.29% runtime.mach_semaphore_signal
0 0% 99.60% 130ms 1.29% runtime.goready
0 0% 99.60% 130ms 1.29% runtime.goready.func1
0 0% 99.60% 130ms 1.29% runtime.goroutineReady
0 0% 99.60% 130ms 1.29% runtime.mach_semrelease
0 0% 99.60% 1280ms 12.67% runtime.mstart
It shows that 72% cumulative CPU time is on concatstring2
Here is another test, I change return src + ""
to return src + " "
, and you can see the memmove
in the profile:
➜ go test -bench BenchmarkCopyString -count 5 -cpuprofile copystring.profile --benchmem
goos: darwin
goarch: amd64
BenchmarkCopyString-8 100000000 21.0 ns/op 0 B/op 0 allocs/op
BenchmarkCopyString-8 100000000 21.5 ns/op 0 B/op 0 allocs/op
BenchmarkCopyString-8 100000000 19.8 ns/op 0 B/op 0 allocs/op
BenchmarkCopyString-8 100000000 19.4 ns/op 0 B/op 0 allocs/op
BenchmarkCopyString-8 100000000 19.3 ns/op 0 B/op 0 allocs/op
PASS
ok _/tmp 10.383s
[jianzhang.zj:/tmp]
➜ go tool pprof tmp.test copystring.profile
File: tmp.test
Type: cpu
Time: Jul 16, 2018 at 1:45pm (CST)
Duration: 10.37s, Total samples = 8.62s (83.10%)
Entering interactive mode (type "help" for commands, "o" for options)
(pprof) top
Showing nodes accounting for 8.57s, 99.42% of 8.62s total
Dropped 36 nodes (cum <= 0.04s)
Showing top 10 nodes out of 23
flat flat% sum% cum cum%
4.36s 50.58% 50.58% 6.48s 75.17% runtime.concatstrings
1.26s 14.62% 65.20% 1.26s 14.62% runtime.memmove
0.91s 10.56% 75.75% 0.91s 10.56% runtime.usleep
0.86s 9.98% 85.73% 0.86s 9.98% runtime.rawstringtmp
0.68s 7.89% 93.62% 7.16s 83.06% runtime.concatstring2
0.44s 5.10% 98.72% 7.60s 88.17% _/tmp.BenchmarkCopyString
0.06s 0.7% 99.42% 0.06s 0.7% runtime.mach_semaphore_signal
0 0% 99.42% 0.06s 0.7% runtime.goready
0 0% 99.42% 0.06s 0.7% runtime.goready.func1
0 0% 99.42% 0.06s 0.7% runtime.goroutineReady
I think we should use -memprofilerate
:
go pprof --help
...
-memprofilerate n
Enable more precise (and expensive) memory profiles by setting
runtime.MemProfileRate. See 'go doc runtime.MemProfileRate'.
To profile all memory allocations, use -test.memprofilerate=1
and pass --alloc_space flag to the pprof tool.
func BenchmarkCopyString2(b *testing.B) {
a := "123"
for i := 0; i < b.N; i++ {
a = (a + "")[:len(a)]
}
}
func BenchmarkCopyString3(b *testing.B) {
a := "123"
for i := 0; i < b.N; i++{
a = string(hack.Slice(a))
}
}
func BenchmarkCopyString4(b *testing.B) {
a := "123"
for i:= 0; i< b.N; i ++{
a = string([]byte(a))
}
}
BenchmarkCopyString2-4 100000000 15.9 ns/op 0 B/op 0 allocs/op
BenchmarkCopyString3-4 50000000 42.6 ns/op 3 B/op 1 allocs/op
BenchmarkCopyString4-4 30000000 40.0 ns/op 3 B/op 1 allocs/op
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@zz-jason but with
-benchmem
flag,src + ""
seems ZERO alloc.....