-
-
Save arnehormann/7930795 to your computer and use it in GitHub Desktop.
// Go MySQL Driver - A MySQL-Driver for Go's database/sql package | |
// | |
// Copyright 2013 The Go-MySQL-Driver Authors. All rights reserved. | |
// | |
// This Source Code Form is subject to the terms of the Mozilla Public | |
// License, v. 2.0. If a copy of the MPL was not distributed with this file, | |
// You can obtain one at http://mozilla.org/MPL/2.0/. | |
package mysql | |
import ( | |
"database/sql/driver" | |
"encoding/binary" | |
"fmt" | |
"testing" | |
) | |
func dtNew(src []byte, length uint8) (driver.Value, error) { | |
// length expects the deterministic length of the zero value, | |
// negative time and 100+ hours are automatically added if needed | |
const digits01 = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" | |
const digits10 = "0000000000111111111122222222223333333333444444444455555555556666666666777777777788888888889999999999" | |
if len(src) == 0 { | |
return zeroDateTime[:length], nil | |
} | |
var dst []byte // return value | |
var p0, p1, p2, p3 byte // current digit pair | |
var zOffs byte // offset of value in zeroDateTime | |
switch length { | |
case 10, 19, 21, 22, 23, 24, 25, 26: | |
default: | |
t := "DATE" | |
if length > 10 { | |
t += "TIME" | |
} | |
return nil, fmt.Errorf("illegal %s length %d", t, length) | |
} | |
switch len(src) { | |
case 4, 7, 11: | |
default: | |
t := "DATE" | |
if length > 10 { | |
t += "TIME" | |
} | |
return nil, fmt.Errorf("illegal %s-packet length %d", t, len(src)) | |
} | |
dst = make([]byte, 0, length) | |
// start with the date | |
year := binary.LittleEndian.Uint16(src[:2]) | |
p0 = byte(year / 100) | |
p1 = byte(year - 100*uint16(p0)) | |
p2, p3 = src[2], src[3] | |
dst = append(dst, | |
digits10[p0], digits01[p0], digits10[p1], digits01[p1], '-', | |
digits10[p2], digits01[p2], '-', | |
digits10[p3], digits01[p3], | |
) | |
if length == 10 { | |
return dst, nil | |
} | |
if len(src) == 4 { | |
return append(dst, zeroDateTime[10:length]...), nil | |
} | |
p1 = src[4] // hour | |
src = src[5:] | |
// p1 is 2-digit hour, src is after hour | |
p2, p3 = src[0], src[1] | |
dst = append(dst, ' ', | |
digits10[p1], digits01[p1], ':', | |
digits10[p2], digits01[p2], ':', | |
digits10[p3], digits01[p3], | |
) | |
if length <= byte(len(dst)) { | |
return dst, nil | |
} | |
src = src[2:] | |
if len(src) == 0 { | |
return append(dst, zeroDateTime[19:zOffs+length]...), nil | |
} | |
// microsecs is little endian uint32 with 3 used bytes | |
// binary.LittleEndian.Uint32(src[:4]) | |
microsecs := uint32(src[0]) | uint32(src[1])<<8 | uint32(src[2])<<16 | |
p1 = byte(microsecs / 10000) | |
microsecs -= 10000 * uint32(p1) | |
p2 = byte(microsecs / 100) | |
microsecs -= 100 * uint32(p2) | |
p3 = byte(microsecs) | |
switch decimals := zOffs + length - 20; decimals { | |
default: | |
return append(dst, '.', | |
digits10[p1], digits01[p1], | |
digits10[p2], digits01[p2], | |
digits10[p3], digits01[p3], | |
), nil | |
case 1: | |
return append(dst, '.', | |
digits10[p1], | |
), nil | |
case 2: | |
return append(dst, '.', | |
digits10[p1], digits01[p1], | |
), nil | |
case 3: | |
return append(dst, '.', | |
digits10[p1], digits01[p1], | |
digits10[p2], | |
), nil | |
case 4: | |
return append(dst, '.', | |
digits10[p1], digits01[p1], | |
digits10[p2], digits01[p2], | |
), nil | |
case 5: | |
return append(dst, '.', | |
digits10[p1], digits01[p1], | |
digits10[p2], digits01[p2], | |
digits10[p3], | |
), nil | |
} | |
} | |
// original utils.go: formatBinaryDate | |
func d_Old(num int, data []byte) (driver.Value, error) { | |
switch num { | |
case 0: | |
return []byte("0000-00-00"), nil | |
case 4: | |
return []byte(fmt.Sprintf( | |
"%04d-%02d-%02d", | |
binary.LittleEndian.Uint16(data[:2]), | |
data[2], | |
data[3], | |
)), nil | |
} | |
return nil, fmt.Errorf("Invalid DATE-packet length %d", num) | |
} | |
// original utils.go: formatBinaryDateTime | |
func dtOld(num int, data []byte) (driver.Value, error) { | |
switch num { | |
case 0: | |
return []byte("0000-00-00 00:00:00"), nil | |
case 4: | |
return []byte(fmt.Sprintf( | |
"%04d-%02d-%02d 00:00:00", | |
binary.LittleEndian.Uint16(data[:2]), | |
data[2], | |
data[3], | |
)), nil | |
case 7: | |
return []byte(fmt.Sprintf( | |
"%04d-%02d-%02d %02d:%02d:%02d", | |
binary.LittleEndian.Uint16(data[:2]), | |
data[2], | |
data[3], | |
data[4], | |
data[5], | |
data[6], | |
)), nil | |
case 11: | |
return []byte(fmt.Sprintf( | |
"%04d-%02d-%02d %02d:%02d:%02d.%06d", | |
binary.LittleEndian.Uint16(data[:2]), | |
data[2], | |
data[3], | |
data[4], | |
data[5], | |
data[6], | |
binary.LittleEndian.Uint32(data[7:11]), | |
)), nil | |
} | |
return nil, fmt.Errorf("Invalid DATETIME-packet length %d", num) | |
} | |
func benchNewDT(b *testing.B, src []byte, outlen uint8) { | |
b.StopTimer() | |
b.ReportAllocs() | |
b.StartTimer() | |
for i := 0; i < b.N; i++ { | |
// new method, datetime | |
_, _ = dtNew(src, outlen) | |
} | |
} | |
func benchNewD_(b *testing.B, src []byte, outlen uint8) { | |
b.StopTimer() | |
b.ReportAllocs() | |
b.StartTimer() | |
for i := 0; i < b.N; i++ { | |
// new method, date only | |
_, _ = dtNew(src, outlen) | |
} | |
} | |
func benchOldDT(b *testing.B, src []byte, outlen uint8) { | |
b.StopTimer() | |
num := len(src) | |
b.ReportAllocs() | |
b.StartTimer() | |
for i := 0; i < b.N; i++ { | |
// old method, datetime | |
_, _ = dtOld(num, src) | |
} | |
} | |
func benchOldD_(b *testing.B, src []byte, outlen uint8) { | |
b.StopTimer() | |
num := len(src) | |
b.ReportAllocs() | |
b.StartTimer() | |
for i := 0; i < b.N; i++ { | |
// old method, date only | |
_, _ = dtOld(num, src) | |
} | |
} | |
var rawDate = []byte{ | |
2012 / 256, 2012 % 256, // year | |
10, // month | |
13, // day | |
15, // hour | |
34, // minute | |
59, // second | |
6, 18, 15, 0, // microsecond (987654) | |
} | |
func BenchmarkFormatNewD_00(b *testing.B) { benchNewD_(b, rawDate[:0], 10) } | |
func BenchmarkFormatOldD_00(b *testing.B) { benchOldD_(b, rawDate[:0], 10) } | |
func BenchmarkFormatNewD_04(b *testing.B) { benchNewD_(b, rawDate[:4], 10) } | |
func BenchmarkFormatOldD_04(b *testing.B) { benchOldD_(b, rawDate[:4], 10) } | |
func BenchmarkFormatNewDT00(b *testing.B) { benchNewDT(b, rawDate[:0], 19) } | |
func BenchmarkFormatOldDT00(b *testing.B) { benchOldDT(b, rawDate[:0], 19) } | |
func BenchmarkFormatNewDT04(b *testing.B) { benchNewDT(b, rawDate[:4], 19) } | |
func BenchmarkFormatOldDT04(b *testing.B) { benchOldDT(b, rawDate[:4], 19) } | |
func BenchmarkFormatNewDT07(b *testing.B) { benchNewDT(b, rawDate[:7], 19) } | |
func BenchmarkFormatOldDT07(b *testing.B) { benchOldDT(b, rawDate[:7], 19) } | |
func BenchmarkFormatNewDT11(b *testing.B) { benchNewDT(b, rawDate[:11], 26) } | |
func BenchmarkFormatOldDT11(b *testing.B) { benchOldDT(b, rawDate[:11], 26) } |
no difference for
if withTime {
if srclen == 11 {
dst = []byte{
'0', '0', '0', '0', '-', '0', '0', '-', '0', '0',
' ', '0', '0', ':', '0', '0', ':', '0', '0',
'.', '0', '0', '0', '0', '0', '0',
}
} else {
dst = []byte{
'0', '0', '0', '0', '-', '0', '0', '-', '0', '0',
' ', '0', '0', ':', '0', '0', ':', '0', '0',
}
}
} else {
dst = []byte{
'0', '0', '0', '0', '-', '0', '0', '-', '0', '0',
}
}
... how do I get rid of one more allocation?!?
You could try to reuse the src slice as dst.
Implementing it with if instead of switch should actually be a few nanoseconds slower, since the compiler can use a jump table for switch :-P
I thought that's only when the values are more or less continous - and for switch in ascending order.
Well, currently go tool 6g -S FILE.go
(Go version from yesterday) is pretty much silent and doesn't yield anything, no output at all. And go tool 6l -a FILE.6
is much too verbose. So I tried but couldn't check. Something's borked in my installation.
Reusing source: but it's not long enough? Should I just overwrite "earlier" parts of the buffer?
ASM with 6g from Go 1.2: http://pastebin.com/F9ny3nuf
I removed the 'testing' import, so line numbers are -1 each ;)
@julienschmidt thanks for the assembly.
There is an uglier / faster version (consistently ~10% for BenchmarkFormatNewDT11 - down to about 190ns):
Get rid of modulo to cut down duplicate operations in assembly.
Taking e.g. month
, replace
dst[5] += (month / 10) % 10
dst[6] += month % 10
with
tmp = month / 10
dst[6] += month - 10*tmp
dst[5] += tmp
Fast but ugly. I don't know if it's worth it - but as the code probably won't be touched again for some time and this gist can always be referenced in a comment...
I changed the version in the gist to use the "improvement" from my latest comment.
We can always jump back with the history...
a := []byte{
'0', '0', '0', '0', '-', '0', '0', '-', '0', '0',
' ', '0', '0', ':', '0', '0', ':', '0', '0',
'.', '0', '0', '0', '0', '0', '0',
}
b := []byte("0000-00-00 00:00:00.000000")
a
and b
compile to the same assembly btw.
@julienschmidt if you are ok with it, please delete your asm comment - it doesn't reflect the current version and I got what I wanted out of it, but it makes the site slower, the discussion longer and Chrome says the site is in Albanian and offers a translation (?!?).
Are you ok with the code as it is here or do you want something changed in the PR?
aaaand I know this is a kinda ridiculous overengineering of an already fast function not called overly much. Still, fun 😀
Moved the ASM to pastebin.
Here is a new snippet generated by pprof --disasm, showing the reason for the additional allocation:
. 3238.5 84: return dst, nil
. . 43731a: MOVQ $4d3460,0(SP)
. . 437322: MOVQ 88(SP),BX
. . 43732a: MOVQ BX,8(SP)
. . 43732f: MOVQ CX,10(SP)
. . 437334: MOVQ DI,18(SP)
. 3238.5 437339: CALL runtime.convT2E(SB)
. . 43733e: MOVQ 20(SP),BX
. . 437343: MOVQ BX,e8(SP)
. . 43734b: MOVQ 28(SP),BX
. . 437350: MOVQ BX,f0(SP)
. . 437358: MOVQ $0,f8(SP)
. . 437364: MOVQ $0,100(SP)
. . 437370: ADDQ $c0,SP
. . 437377: RET
As said earlier, the len=0 case could still be optimized. Something like:
var zeroDateTime []byte = []byte("0000-00-00 00:00:00")
func dtNew(src []byte, withTime bool) (driver.Value, error) {
if len(src) == 0 {
if withTime {
return zeroDateTime, nil
}
return zeroDateTime[:10], nil
}
...
I did a rewrite to include another approach with appending digit tuples.
This also enables a new api which lets you specify the output length independent of the input length.
No significant slowdown:
BenchmarkFormatNewD_00 50000000 69.8 ns/op 32 B/op 1 allocs/op
BenchmarkFormatNew2D_00 50000000 69.9 ns/op 32 B/op 1 allocs/op
BenchmarkFormatOldD_00 20000000 132 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNewD_04 10000000 191 ns/op 48 B/op 2 allocs/op
BenchmarkFormatNew2D_04 10000000 204 ns/op 48 B/op 2 allocs/op
BenchmarkFormatOldD_04 2000000 833 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT00 50000000 69.6 ns/op 32 B/op 1 allocs/op
BenchmarkFormatNew2DT00 50000000 70.3 ns/op 32 B/op 1 allocs/op
BenchmarkFormatOldDT00 20000000 131 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNewDT04 10000000 198 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNew2DT04 10000000 222 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT04 2000000 833 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT07 10000000 209 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNew2DT07 10000000 250 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT07 1000000 1272 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT11 10000000 229 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNew2DT11 10000000 289 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT11 1000000 1496 ns/op 96 B/op 3 allocs/op
And with the current version (2014-06-05), I beat the old one and gain flexible output length handling.
BenchmarkFormatNewD_00 50000000 69.4 ns/op 32 B/op 1 allocs/op
BenchmarkFormatNew2D_00 50000000 69.4 ns/op 32 B/op 1 allocs/op
BenchmarkFormatOldD_00 20000000 130.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNewD_04 10000000 189.0 ns/op 48 B/op 2 allocs/op
BenchmarkFormatNew2D_04 10000000 179.0 ns/op 48 B/op 2 allocs/op
BenchmarkFormatOldD_04 2000000 823.0 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT00 50000000 69.3 ns/op 32 B/op 1 allocs/op
BenchmarkFormatNew2DT00 50000000 69.2 ns/op 32 B/op 1 allocs/op
BenchmarkFormatOldDT00 20000000 129.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNewDT04 10000000 200.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNew2DT04 10000000 194.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT04 2000000 824.0 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT07 10000000 211.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNew2DT07 10000000 205.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT07 1000000 1256.0 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT11 10000000 231.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNew2DT11 10000000 225.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT11 1000000 1489.0 ns/op 96 B/op 3 allocs/op
improved ...New2 with inspiration from strconv and replaced ...New with it.
BenchmarkFormatNewD_00 50000000 68.6 ns/op 32 B/op 1 allocs/op
BenchmarkFormatOldD_00 20000000 129 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNewD_04 10000000 175 ns/op 48 B/op 2 allocs/op
BenchmarkFormatOldD_04 2000000 848 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT00 50000000 68.2 ns/op 32 B/op 1 allocs/op
BenchmarkFormatOldDT00 20000000 129 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNewDT04 10000000 193 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT04 2000000 844 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT07 10000000 201 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT07 1000000 1294 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT11 10000000 221 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT11 1000000 1498 ns/op 96 B/op 3 allocs/op
Also tried this with if instead of switch, different order (the current one makes error handling easier) and less local vars - no visible improvement. The version below shows all those changes I tried mashed into one.