Skip to content

Instantly share code, notes, and snippets.

@kokes
Created December 17, 2020 09:05
Show Gist options
  • Save kokes/8670519d879f9e83014871ea81d8c986 to your computer and use it in GitHub Desktop.
Save kokes/8670519d879f9e83014871ea81d8c986 to your computer and use it in GitHub Desktop.
Testing sad paths of `strconv.ParseInt` with error reporting changed from custom errors to plain `errors.New`. `atoi.go` is a combination of various files from the standard library, with only error reporting changed. `atoi_test.go` is the benchmark itself. We can see that given invalid input, `strconv.ParseInt` allocates and this dominates the r…
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mystrconv
import "errors"
var errNotInt = errors.New("not an int")
const intSize = 32 << (^uint(0) >> 63)
// IntSize is the size in bits of an int or uint value.
const IntSize = intSize
const maxUint64 = 1<<64 - 1
func lower(c byte) byte {
return c | ('x' - 'X')
}
// ParseUint is like ParseInt but for unsigned numbers.
func ParseUint(s string, base int, bitSize int) (uint64, error) {
const fnParseUint = "ParseUint"
if s == "" {
return 0, errNotInt
}
base0 := base == 0
s0 := s
switch {
case 2 <= base && base <= 36:
// valid base; nothing to do
case base == 0:
// Look for octal, hex prefix.
base = 10
if s[0] == '0' {
switch {
case len(s) >= 3 && lower(s[1]) == 'b':
base = 2
s = s[2:]
case len(s) >= 3 && lower(s[1]) == 'o':
base = 8
s = s[2:]
case len(s) >= 3 && lower(s[1]) == 'x':
base = 16
s = s[2:]
default:
base = 8
s = s[1:]
}
}
default:
return 0, errNotInt
}
if bitSize == 0 {
bitSize = IntSize
} else if bitSize < 0 || bitSize > 64 {
return 0, errNotInt
}
// Cutoff is the smallest number such that cutoff*base > maxUint64.
// Use compile-time constants for common cases.
var cutoff uint64
switch base {
case 10:
cutoff = maxUint64/10 + 1
case 16:
cutoff = maxUint64/16 + 1
default:
cutoff = maxUint64/uint64(base) + 1
}
maxVal := uint64(1)<<uint(bitSize) - 1
underscores := false
var n uint64
for _, c := range []byte(s) {
var d byte
switch {
case c == '_' && base0:
underscores = true
continue
case '0' <= c && c <= '9':
d = c - '0'
case 'a' <= lower(c) && lower(c) <= 'z':
d = lower(c) - 'a' + 10
default:
return 0, errNotInt
}
if d >= byte(base) {
return 0, errNotInt
}
if n >= cutoff {
// n*base overflows
return maxVal, errNotInt
}
n *= uint64(base)
n1 := n + uint64(d)
if n1 < n || n1 > maxVal {
// n+v overflows
return maxVal, errNotInt
}
n = n1
}
if underscores && !underscoreOK(s0) {
return 0, errNotInt
}
return n, nil
}
func ParseInt(s string, base int, bitSize int) (i int64, err error) {
const fnParseInt = "ParseInt"
if s == "" {
return 0, errNotInt
}
// Pick off leading sign.
neg := false
if s[0] == '+' {
s = s[1:]
} else if s[0] == '-' {
neg = true
s = s[1:]
}
// Convert unsigned and check range.
var un uint64
un, err = ParseUint(s, base, bitSize)
if err != nil {
return 0, err
}
if bitSize == 0 {
bitSize = IntSize
}
cutoff := uint64(1 << uint(bitSize-1))
if !neg && un >= cutoff {
return int64(cutoff - 1), errNotInt
}
if neg && un > cutoff {
return -int64(cutoff), errNotInt
}
n := int64(un)
if neg {
n = -n
}
return n, nil
}
func underscoreOK(s string) bool {
// saw tracks the last character (class) we saw:
// ^ for beginning of number,
// 0 for a digit or base prefix,
// _ for an underscore,
// ! for none of the above.
saw := '^'
i := 0
// Optional sign.
if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
s = s[1:]
}
// Optional base prefix.
hex := false
if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
i = 2
saw = '0' // base prefix counts as a digit for "underscore as digit separator"
hex = lower(s[1]) == 'x'
}
// Number proper.
for ; i < len(s); i++ {
// Digits are always okay.
if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
saw = '0'
continue
}
// Underscore must follow digit.
if s[i] == '_' {
if saw != '0' {
return false
}
saw = '_'
continue
}
// Underscore must also be followed by digit.
if saw == '_' {
return false
}
// Saw non-digit, non-underscore.
saw = '!'
}
return saw != '_'
}
package mystrconv
import (
"strconv"
"testing"
)
func isIntStdLib(s string) bool {
if _, err := strconv.ParseInt(s, 10, 64); err != nil {
return false
}
return true
}
func isIntMine(s string) bool {
if _, err := ParseInt(s, 10, 64); err != nil {
return false
}
return true
}
var res bool
func BenchmarkParseInt(b *testing.B) {
type testCase struct {
name string
val string
fnc func(string) bool
}
cases := []testCase{
{"strconv.ParseInt-valid", "123", isIntStdLib},
{"strconv.ParseInt-invalid", "123g", isIntStdLib},
{"custom with no allocs-valid", "123", isIntMine},
{"custom with no allocs-invalid", "123g", isIntMine},
}
for _, test := range cases {
b.Run(test.name, func(b *testing.B) {
for j := 0; j < b.N; j++ {
res = test.fnc(test.val)
}
})
}
}
$ go test -bench=. -benchmem
goos: darwin
goarch: amd64
BenchmarkParseInt/strconv.ParseInt-valid-12 80329828 14.0 ns/op 0 B/op 0 allocs/op
BenchmarkParseInt/strconv.ParseInt-invalid-12 21108784 56.1 ns/op 48 B/op 1 allocs/op
BenchmarkParseInt/custom_with_no_allocs-valid-12 79545568 13.8 ns/op 0 B/op 0 allocs/op
BenchmarkParseInt/custom_with_no_allocs-invalid-12 85985926 14.5 ns/op 0 B/op 0 allocs/op
PASS
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment