package main | |
import ( | |
"fmt" | |
) | |
func main() { | |
fmt.Printf("U+%04X\n", decode([]byte{0x61})) | |
fmt.Printf("U+%04X\n", decode([]byte{0xc2, 0xa1})) | |
fmt.Printf("U+%04X\n", decode([]byte{0xea, 0x9c, 0xb3})) | |
fmt.Printf("U+%04X\n", decode([]byte{0xf0, 0x9f, 0x8e, 0x87})) | |
} | |
func decode(buf []byte) rune { | |
// Find the first zero bit | |
if buf[0] & 0x80 == 0 { | |
// ASCII | |
return rune(buf[0]) | |
} | |
if buf[0]&(0x80>>1) == 0 { | |
return 0xFFFD // Invalid as the first byte. | |
} | |
var size int | |
for i := 2; i <= 4; i++ { | |
if buf[0]&(0x80>>i) == 0 { | |
size = i | |
break | |
} | |
} | |
if size == 0 { | |
return 0xFFFD | |
} | |
r := rune(buf[0]) & (0x7f >> size) | |
for _, b := range buf[1:size] { | |
// Make sure the two high bits are 1 and 0 respectively. | |
if b&(0x80|0x40) != 0x80 { | |
return 0xFFFD | |
} | |
// There are 6 bits of the code point in this byte. So shift | |
// everything to make room. | |
r <<= 6 | |
// Put the low 6 bits of b on the low 6 bits of char. | |
r |= rune(b & (0xff >> 2)) | |
} | |
return r | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment