Skip to content

Instantly share code, notes, and snippets.

@uriid1
Last active April 10, 2024 01:04
Show Gist options
  • Save uriid1/77e9efff56b2ce5d833deeda411b57e5 to your computer and use it in GitHub Desktop.
Save uriid1/77e9efff56b2ce5d833deeda411b57e5 to your computer and use it in GitHub Desktop.
Format unicode blocks to lua table
--
-- Format table:
-- unicode.blocks = {
-- ['Domino-Tiles'] = "\u{1F030}-\u{1F09F}",
-- ['Supplemental-Arrows-C'] = "\u{1F800}-\\u{1F8FF}",
-- ...
-- }
--
-- Blocks.txt = https://www.unicode.org/Public/UNIDATA/Blocks.txt
local unicodeBlocks = {}
for line in io.lines('Blocks.txt', "*l") do
local firstChar = line:sub(1, 1)
if firstChar == '#' or firstChar == '' then
goto continue
end
-- Capture
local startCode, endCode, name = line:match('([A-Z0-9]+)%.%.([A-Z0-9]+); (.+)')
if not (startCode and endCode and name) then
goto continue
end
name = name:gsub(' ', '-')
-- Add
unicodeBlocks[name] = ('\\u{%s}-\\u{%s}'):format(startCode, endCode)
::continue::
end
local function getChars(property)
local start = tonumber(property:match('^\\u{(.+)}%-'), 16)
local finish = tonumber(property:match('^\\u{.+}%-\\u{(.+)}'), 16)
local result = ''
for codepoint = start, finish do
result = result .. utf8.char(codepoint)
end
return result
end
return {
blocks = unicodeBlocks,
getChars = getChars,
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment