Skip to content

Instantly share code, notes, and snippets.

@skeeto
Last active April 5, 2023 07:54
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skeeto/09f1410183d246f9b18cba95c4e602f0 to your computer and use it in GitHub Desktop.
Save skeeto/09f1410183d246f9b18cba95c4e602f0 to your computer and use it in GitHub Desktop.
Faster UTF-16 decode in Go
// This is free and unencumbered software released into the public domain.
package main
import (
"math/rand"
"testing"
"unicode/utf16"
"unicode/utf8"
)
func isHigh(r rune) bool {
return r >= 0xd800 && r <= 0xdbff
}
func isLow(r rune) bool {
return r >= 0xdc00 && r <= 0xdfff
}
func Decode(p []uint16) string {
s := make([]byte, 0, 2*len(p))
for i := 0; i < len(p); i++ {
r := rune(0xfffd)
r1 := rune(p[i])
if isHigh(r1) {
if i+1 < len(p) {
r2 := rune(p[i+1])
if isLow(r2) {
i++
r = 0x10000 + (r1-0xd800)<<10 + (r2 - 0xdc00)
//r = utf16.DecodeRune(r1, r2)
}
}
} else if !isLow(r) {
r = r1
}
s = utf8.AppendRune(s, r)
}
return string(s)
}
func genbuf(n int) []uint16 {
r := rand.New(rand.NewSource(int64(n)))
buf := make([]rune, n)
for i := 0; i < n; i++ {
// Simulate mostly-ASCII
if r.Intn(100) == 0 {
buf[i] = rune(r.Intn(0x10ffff + 1))
} else {
buf[i] = rune(r.Intn(1 << 7))
}
}
return utf16.Encode(buf)
}
func TestCustom(t *testing.T) {
for i := 0; i < 24; i++ {
buf := genbuf(1 << i)
w := string(utf16.Decode(buf))
g := string(Decode(buf))
if w != g {
t.Errorf("mismatch on 1<<%d", i)
}
}
}
func BenchmarkStdlib(b *testing.B) {
b.StopTimer()
buf := genbuf(b.N)
b.StartTimer()
_ = string(utf16.Decode(buf))
}
func BenchmarkCustom(b *testing.B) {
b.StopTimer()
buf := genbuf(b.N)
b.StartTimer()
_ = Decode(buf)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment