Skip to content

Instantly share code, notes, and snippets.

@cloudwu
Created October 22, 2020 07:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cloudwu/744a0d80fe51da0c02fbcb4d1432bed3 to your computer and use it in GitHub Desktop.
Save cloudwu/744a0d80fe51da0c02fbcb4d1432bed3 to your computer and use it in GitHub Desktop.
lua UTF16 BE lib
local utf16 = {}
-- Big Endian
function utf16.toutf8(s)
local surrogate
return (s:gsub("..", function(utf16)
local cp = string.unpack(">H", utf16)
if (cp & 0xFC00) == 0xD800 then
surrogate = cp
return ""
else
if surrogate then
cp = ((surrogate - 0xD800) << 10) + (cp - 0xDC00) + 0x10000
surrogate = nil
end
return utf8.char(cp)
end
end))
end
function utf16.fromutf8(s)
return (s:gsub(utf8.charpattern, function (u8)
local cp = utf8.codepoint(u8)
if cp <= 0xffff then
return string.pack(">H", cp)
else
cp = cp - 0x10000
local surrogate = (cp >> 10) + 0xD800
local lo = (cp & 0x3ff) + 0xDC00
return string.pack(">HH", surrogate, lo)
end
end))
end
--local s = utf16.fromutf8("😀")
--assert(utf16.toutf8(s) == "😀")
return utf16
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment