Last active
November 29, 2023 22:08
-
-
Save bradleypeabody/185b1d7ed6c0c2ab6cec to your computer and use it in GitHub Desktop.
golang, convert UTF-16 to UTF-8 string
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
// http://play.golang.org/p/fVf7duRtdH | |
import "fmt" | |
import "unicode/utf16" | |
import "unicode/utf8" | |
import "bytes" | |
func main() { | |
b := []byte{ | |
0xff, // BOM | |
0xfe, // BOM | |
'T', | |
0x00, | |
'E', | |
0x00, | |
'S', | |
0x00, | |
'T', | |
0x00, | |
0x6C, | |
0x34, | |
'\n', | |
0x00, | |
} | |
s, err := DecodeUTF16(b) | |
if err != nil { | |
panic(err) | |
} | |
fmt.Println(s) | |
} | |
func DecodeUTF16(b []byte) (string, error) { | |
if len(b)%2 != 0 { | |
return "", fmt.Errorf("Must have even length byte slice") | |
} | |
u16s := make([]uint16, 1) | |
ret := &bytes.Buffer{} | |
b8buf := make([]byte, 4) | |
lb := len(b) | |
for i := 0; i < lb; i += 2 { | |
u16s[0] = uint16(b[i]) + (uint16(b[i+1]) << 8) | |
r := utf16.Decode(u16s) | |
n := utf8.EncodeRune(b8buf, r[0]) | |
ret.Write(b8buf[:n]) | |
} | |
return ret.String(), nil | |
} |
from the blog of http://angelonotes.blogspot.com/2015/09/golang-utf16-utf8.html
bs_UTF16LE, _, _ := transform.Bytes(unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewEncoder(), []byte("測試"))
bs_UTF16BE, _, _ := transform.Bytes(unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder(), []byte("測試"))
bs_UTF8LE, _, _ := transform.Bytes(unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder(), bs_UTF16LE)
bs_UTF8BE, _, _ := transform.Bytes(unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder(), bs_UTF16BE)
Saved me a lot, thank you
Thanks very much!
golang
already has support for decoding[]byte
into[]uint16
(respecting the endianness):func DecodeUtf16(b []byte, order binary.ByteOrder) (string, error) { ints := make([]uint16, len(b)/2) if err := binary.Read(bytes.NewReader(b), order, &ints); err != nil { return "", err } return string(utf16.Decode(ints)), nil }@akirabbq @ik5
complete solution (which also works with surrogate pairs): utf16.go
You send just function I need to convert clob Oracle data to string.
Life saver, thanks
This helped me to decode UTF-16LE to UTF-8: https://blog.fearcat.in/a?ID=00001-1bd90844-ce0c-4fac-9b8f-fe3d8a30451d
decoder := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder()
utf8bytes, err := decoder.Bytes(data) // data contains UTF16LE as read from a file
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
golang
already has support for decoding[]byte
into[]uint16
(respecting the endianness):@akirabbq @ik5
complete solution (which also works with surrogate pairs): utf16.go