Skip to content

Instantly share code, notes, and snippets.

@CandyMi
Last active July 15, 2024 11:44
Show Gist options
  • Save CandyMi/0115f704c48612a01813a4360a8bc2ac to your computer and use it in GitHub Desktop.
Save CandyMi/0115f704c48612a01813a4360a8bc2ac to your computer and use it in GitHub Desktop.
对指定字符串进行`Unicode`编码、解码.

Unicode 编码 / 解码

测试代码:

local unicode = require "unicode"

print(unicode.encode("我是谁abc"))
print(unicode.encode("我是谁abc", true))
print(unicode.decode('\\u6211\\u662F\\u8C01\\u0061\\u0062\\u0063'))

结果:

\u6211\u662f\u8c01\u0061\u0062\u0063
\u6211\u662F\u8C01\u0061\u0062\u0063
我是谁abc
local tonumber = tonumber
local gsub = string.gsub
local fmt = string.format
local u8_char = utf8.char
local u8_codepoint = utf8.codepoint
local u8_charpattern = utf8.charpattern
local unicode = { }
local function u8_dec(s)
local ord = tonumber(s, 16)
if not ord then
return '\\u' .. s
end
return ord < 32 and fmt("\\u%x", ord) or u8_char(ord)
end
local function u8_upper_enc(s)
return fmt("\\u%04X", u8_codepoint(s))
end
local function u8_lower_enc(s)
return fmt("\\u%04x", u8_codepoint(s))
end
---comment 对文本进行`Unicode`编码
---@param text string @原始文本
---@param upper boolean? @是否大写
function unicode.encode(text, upper)
local v = gsub(text, u8_charpattern, upper and u8_upper_enc or u8_lower_enc)
return v
end
---comment 对文本进行`Unicode`解码
---@param text string @原始文本
function unicode.decode(text)
local v = gsub(text, '\\u([%da-fA-F][%da-fA-F][%da-fA-F][%da-fA-F]+)', u8_dec)
return v
end
return unicode
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment