Last active
December 31, 2015 03:59
-
-
Save arnehormann/7930795 to your computer and use it in GitHub Desktop.
benchmark another way to format mysql date and datetime into a `[]byte`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Go MySQL Driver - A MySQL-Driver for Go's database/sql package | |
// | |
// Copyright 2013 The Go-MySQL-Driver Authors. All rights reserved. | |
// | |
// This Source Code Form is subject to the terms of the Mozilla Public | |
// License, v. 2.0. If a copy of the MPL was not distributed with this file, | |
// You can obtain one at http://mozilla.org/MPL/2.0/. | |
package mysql | |
import ( | |
"database/sql/driver" | |
"encoding/binary" | |
"fmt" | |
"testing" | |
) | |
func dtNew(src []byte, length uint8) (driver.Value, error) { | |
// length expects the deterministic length of the zero value, | |
// negative time and 100+ hours are automatically added if needed | |
const digits01 = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" | |
const digits10 = "0000000000111111111122222222223333333333444444444455555555556666666666777777777788888888889999999999" | |
if len(src) == 0 { | |
return zeroDateTime[:length], nil | |
} | |
var dst []byte // return value | |
var p0, p1, p2, p3 byte // current digit pair | |
var zOffs byte // offset of value in zeroDateTime | |
switch length { | |
case 10, 19, 21, 22, 23, 24, 25, 26: | |
default: | |
t := "DATE" | |
if length > 10 { | |
t += "TIME" | |
} | |
return nil, fmt.Errorf("illegal %s length %d", t, length) | |
} | |
switch len(src) { | |
case 4, 7, 11: | |
default: | |
t := "DATE" | |
if length > 10 { | |
t += "TIME" | |
} | |
return nil, fmt.Errorf("illegal %s-packet length %d", t, len(src)) | |
} | |
dst = make([]byte, 0, length) | |
// start with the date | |
year := binary.LittleEndian.Uint16(src[:2]) | |
p0 = byte(year / 100) | |
p1 = byte(year - 100*uint16(p0)) | |
p2, p3 = src[2], src[3] | |
dst = append(dst, | |
digits10[p0], digits01[p0], digits10[p1], digits01[p1], '-', | |
digits10[p2], digits01[p2], '-', | |
digits10[p3], digits01[p3], | |
) | |
if length == 10 { | |
return dst, nil | |
} | |
if len(src) == 4 { | |
return append(dst, zeroDateTime[10:length]...), nil | |
} | |
p1 = src[4] // hour | |
src = src[5:] | |
// p1 is 2-digit hour, src is after hour | |
p2, p3 = src[0], src[1] | |
dst = append(dst, ' ', | |
digits10[p1], digits01[p1], ':', | |
digits10[p2], digits01[p2], ':', | |
digits10[p3], digits01[p3], | |
) | |
if length <= byte(len(dst)) { | |
return dst, nil | |
} | |
src = src[2:] | |
if len(src) == 0 { | |
return append(dst, zeroDateTime[19:zOffs+length]...), nil | |
} | |
// microsecs is little endian uint32 with 3 used bytes | |
// binary.LittleEndian.Uint32(src[:4]) | |
microsecs := uint32(src[0]) | uint32(src[1])<<8 | uint32(src[2])<<16 | |
p1 = byte(microsecs / 10000) | |
microsecs -= 10000 * uint32(p1) | |
p2 = byte(microsecs / 100) | |
microsecs -= 100 * uint32(p2) | |
p3 = byte(microsecs) | |
switch decimals := zOffs + length - 20; decimals { | |
default: | |
return append(dst, '.', | |
digits10[p1], digits01[p1], | |
digits10[p2], digits01[p2], | |
digits10[p3], digits01[p3], | |
), nil | |
case 1: | |
return append(dst, '.', | |
digits10[p1], | |
), nil | |
case 2: | |
return append(dst, '.', | |
digits10[p1], digits01[p1], | |
), nil | |
case 3: | |
return append(dst, '.', | |
digits10[p1], digits01[p1], | |
digits10[p2], | |
), nil | |
case 4: | |
return append(dst, '.', | |
digits10[p1], digits01[p1], | |
digits10[p2], digits01[p2], | |
), nil | |
case 5: | |
return append(dst, '.', | |
digits10[p1], digits01[p1], | |
digits10[p2], digits01[p2], | |
digits10[p3], | |
), nil | |
} | |
} | |
// original utils.go: formatBinaryDate | |
func d_Old(num int, data []byte) (driver.Value, error) { | |
switch num { | |
case 0: | |
return []byte("0000-00-00"), nil | |
case 4: | |
return []byte(fmt.Sprintf( | |
"%04d-%02d-%02d", | |
binary.LittleEndian.Uint16(data[:2]), | |
data[2], | |
data[3], | |
)), nil | |
} | |
return nil, fmt.Errorf("Invalid DATE-packet length %d", num) | |
} | |
// original utils.go: formatBinaryDateTime | |
func dtOld(num int, data []byte) (driver.Value, error) { | |
switch num { | |
case 0: | |
return []byte("0000-00-00 00:00:00"), nil | |
case 4: | |
return []byte(fmt.Sprintf( | |
"%04d-%02d-%02d 00:00:00", | |
binary.LittleEndian.Uint16(data[:2]), | |
data[2], | |
data[3], | |
)), nil | |
case 7: | |
return []byte(fmt.Sprintf( | |
"%04d-%02d-%02d %02d:%02d:%02d", | |
binary.LittleEndian.Uint16(data[:2]), | |
data[2], | |
data[3], | |
data[4], | |
data[5], | |
data[6], | |
)), nil | |
case 11: | |
return []byte(fmt.Sprintf( | |
"%04d-%02d-%02d %02d:%02d:%02d.%06d", | |
binary.LittleEndian.Uint16(data[:2]), | |
data[2], | |
data[3], | |
data[4], | |
data[5], | |
data[6], | |
binary.LittleEndian.Uint32(data[7:11]), | |
)), nil | |
} | |
return nil, fmt.Errorf("Invalid DATETIME-packet length %d", num) | |
} | |
func benchNewDT(b *testing.B, src []byte, outlen uint8) { | |
b.StopTimer() | |
b.ReportAllocs() | |
b.StartTimer() | |
for i := 0; i < b.N; i++ { | |
// new method, datetime | |
_, _ = dtNew(src, outlen) | |
} | |
} | |
func benchNewD_(b *testing.B, src []byte, outlen uint8) { | |
b.StopTimer() | |
b.ReportAllocs() | |
b.StartTimer() | |
for i := 0; i < b.N; i++ { | |
// new method, date only | |
_, _ = dtNew(src, outlen) | |
} | |
} | |
func benchOldDT(b *testing.B, src []byte, outlen uint8) { | |
b.StopTimer() | |
num := len(src) | |
b.ReportAllocs() | |
b.StartTimer() | |
for i := 0; i < b.N; i++ { | |
// old method, datetime | |
_, _ = dtOld(num, src) | |
} | |
} | |
func benchOldD_(b *testing.B, src []byte, outlen uint8) { | |
b.StopTimer() | |
num := len(src) | |
b.ReportAllocs() | |
b.StartTimer() | |
for i := 0; i < b.N; i++ { | |
// old method, date only | |
_, _ = dtOld(num, src) | |
} | |
} | |
var rawDate = []byte{ | |
2012 / 256, 2012 % 256, // year | |
10, // month | |
13, // day | |
15, // hour | |
34, // minute | |
59, // second | |
6, 18, 15, 0, // microsecond (987654) | |
} | |
func BenchmarkFormatNewD_00(b *testing.B) { benchNewD_(b, rawDate[:0], 10) } | |
func BenchmarkFormatOldD_00(b *testing.B) { benchOldD_(b, rawDate[:0], 10) } | |
func BenchmarkFormatNewD_04(b *testing.B) { benchNewD_(b, rawDate[:4], 10) } | |
func BenchmarkFormatOldD_04(b *testing.B) { benchOldD_(b, rawDate[:4], 10) } | |
func BenchmarkFormatNewDT00(b *testing.B) { benchNewDT(b, rawDate[:0], 19) } | |
func BenchmarkFormatOldDT00(b *testing.B) { benchOldDT(b, rawDate[:0], 19) } | |
func BenchmarkFormatNewDT04(b *testing.B) { benchNewDT(b, rawDate[:4], 19) } | |
func BenchmarkFormatOldDT04(b *testing.B) { benchOldDT(b, rawDate[:4], 19) } | |
func BenchmarkFormatNewDT07(b *testing.B) { benchNewDT(b, rawDate[:7], 19) } | |
func BenchmarkFormatOldDT07(b *testing.B) { benchOldDT(b, rawDate[:7], 19) } | |
func BenchmarkFormatNewDT11(b *testing.B) { benchNewDT(b, rawDate[:11], 26) } | |
func BenchmarkFormatOldDT11(b *testing.B) { benchOldDT(b, rawDate[:11], 26) } |
As said earlier, the len=0 case could still be optimized. Something like:
var zeroDateTime []byte = []byte("0000-00-00 00:00:00")
func dtNew(src []byte, withTime bool) (driver.Value, error) {
if len(src) == 0 {
if withTime {
return zeroDateTime, nil
}
return zeroDateTime[:10], nil
}
...
I did a rewrite to include another approach with appending digit tuples.
This also enables a new api which lets you specify the output length independent of the input length.
No significant slowdown:
BenchmarkFormatNewD_00 50000000 69.8 ns/op 32 B/op 1 allocs/op
BenchmarkFormatNew2D_00 50000000 69.9 ns/op 32 B/op 1 allocs/op
BenchmarkFormatOldD_00 20000000 132 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNewD_04 10000000 191 ns/op 48 B/op 2 allocs/op
BenchmarkFormatNew2D_04 10000000 204 ns/op 48 B/op 2 allocs/op
BenchmarkFormatOldD_04 2000000 833 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT00 50000000 69.6 ns/op 32 B/op 1 allocs/op
BenchmarkFormatNew2DT00 50000000 70.3 ns/op 32 B/op 1 allocs/op
BenchmarkFormatOldDT00 20000000 131 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNewDT04 10000000 198 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNew2DT04 10000000 222 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT04 2000000 833 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT07 10000000 209 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNew2DT07 10000000 250 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT07 1000000 1272 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT11 10000000 229 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNew2DT11 10000000 289 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT11 1000000 1496 ns/op 96 B/op 3 allocs/op
And with the current version (2014-06-05), I beat the old one and gain flexible output length handling.
BenchmarkFormatNewD_00 50000000 69.4 ns/op 32 B/op 1 allocs/op
BenchmarkFormatNew2D_00 50000000 69.4 ns/op 32 B/op 1 allocs/op
BenchmarkFormatOldD_00 20000000 130.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNewD_04 10000000 189.0 ns/op 48 B/op 2 allocs/op
BenchmarkFormatNew2D_04 10000000 179.0 ns/op 48 B/op 2 allocs/op
BenchmarkFormatOldD_04 2000000 823.0 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT00 50000000 69.3 ns/op 32 B/op 1 allocs/op
BenchmarkFormatNew2DT00 50000000 69.2 ns/op 32 B/op 1 allocs/op
BenchmarkFormatOldDT00 20000000 129.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNewDT04 10000000 200.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNew2DT04 10000000 194.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT04 2000000 824.0 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT07 10000000 211.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNew2DT07 10000000 205.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT07 1000000 1256.0 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT11 10000000 231.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNew2DT11 10000000 225.0 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT11 1000000 1489.0 ns/op 96 B/op 3 allocs/op
improved ...New2 with inspiration from strconv and replaced ...New with it.
BenchmarkFormatNewD_00 50000000 68.6 ns/op 32 B/op 1 allocs/op
BenchmarkFormatOldD_00 20000000 129 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNewD_04 10000000 175 ns/op 48 B/op 2 allocs/op
BenchmarkFormatOldD_04 2000000 848 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT00 50000000 68.2 ns/op 32 B/op 1 allocs/op
BenchmarkFormatOldDT00 20000000 129 ns/op 64 B/op 2 allocs/op
BenchmarkFormatNewDT04 10000000 193 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT04 2000000 844 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT07 10000000 201 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT07 1000000 1294 ns/op 96 B/op 3 allocs/op
BenchmarkFormatNewDT11 10000000 221 ns/op 64 B/op 2 allocs/op
BenchmarkFormatOldDT11 1000000 1498 ns/op 96 B/op 3 allocs/op
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Moved the ASM to pastebin.
Here is a new snippet generated by pprof --disasm, showing the reason for the additional allocation: