Skip to content

Instantly share code, notes, and snippets.

@pboyd pboyd/utf8decode.go

Created Jul 5, 2020
Embed
What would you like to do?
package main
import (
"fmt"
)
func main() {
fmt.Printf("U+%04X\n", decode([]byte{0x61}))
fmt.Printf("U+%04X\n", decode([]byte{0xc2, 0xa1}))
fmt.Printf("U+%04X\n", decode([]byte{0xea, 0x9c, 0xb3}))
fmt.Printf("U+%04X\n", decode([]byte{0xf0, 0x9f, 0x8e, 0x87}))
}
func decode(buf []byte) rune {
// Find the first zero bit
if buf[0] & 0x80 == 0 {
// ASCII
return rune(buf[0])
}
if buf[0]&(0x80>>1) == 0 {
return 0xFFFD // Invalid as the first byte.
}
var size int
for i := 2; i <= 4; i++ {
if buf[0]&(0x80>>i) == 0 {
size = i
break
}
}
if size == 0 {
return 0xFFFD
}
r := rune(buf[0]) & (0x7f >> size)
for _, b := range buf[1:size] {
// Make sure the two high bits are 1 and 0 respectively.
if b&(0x80|0x40) != 0x80 {
return 0xFFFD
}
// There are 6 bits of the code point in this byte. So shift
// everything to make room.
r <<= 6
// Put the low 6 bits of b on the low 6 bits of char.
r |= rune(b & (0xff >> 2))
}
return r
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.