Skip to content

Instantly share code, notes, and snippets.

@kaeza
Last active April 5, 2019 20:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kaeza/5935259048d914b4f518faf4a5447e8c to your computer and use it in GitHub Desktop.
Save kaeza/5935259048d914b4f518faf4a5447e8c to your computer and use it in GitHub Desktop.
Lua - Full-width Unicode
---
-- Unicode transformations.
--
-- All input/output is done in UTF-8.
--
-- Copyright © 2017-2018 Diego Martínez
--
-- Permission is hereby granted, free of charge, to any person obtaining a copy
-- of this software and associated documentation files (the "Software"), to
-- deal in the Software without restriction, including without limitation the
-- rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-- sell copies of the Software, and to permit persons to whom the Software is
-- furnished to do so, subject to the following conditions:
--
-- * The above copyright notice and this permission notice shall be included in
--   all copies or substantial portions of the Software.
--
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-- IN THE SOFTWARE.
--
-- @module uniconv
-- @author Diego Martínez <https://github.com/kaeza>
-- @license MIT
local uniconv = { }
local fullwidth = ""
.." !"#$%&'()*+,-./"
.."0123456789:;<=>?"
.."@ABCDEFGHIJKLMNO"
.."PQRSTUVWXYZ[\]^_"
.."`abcdefghijklmno"
.."pqrstuvwxyz{|}~"
---
-- Converts printable ascii characters to their full width equivalents.
--
-- All characters between space (' ' 0x20) and tilde ('~' 0x7e) are replaced;
-- all other characters are left intact.
function uniconv.fullwidth(s)
local out = { }
for i = 1, #s do
local c = s:byte(i, i)
if c >= 32 and c < 127 then
c = c - 32
c = fullwidth:sub(c*3+1, c*3+3)
else
c = string.char(c)
end
out[#out+1] = c
end
return table.concat(out)
end
local ctrlrepr = "␀␁␂␃␄␅␆␇␈␉␊␋␌␍␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟"
local spacerepr = "␠"
local delrepr = "␡"
---
-- Convert control characters to their unicode representation.
--
-- Control codes (0x00 NUL - 0x1F US, and 0x7F DEL) are replaced by their
-- respective characters in the "Control Pictures" (U+2420 - U+2426) unicode
-- block. If `includespace` is true, the space character (0x20) is also
-- replaced. All other characters are left intact.
--
-- @tparam string s String to convert.
-- @treturn string Converted string.
function uniconv.ctrlrepr(s, includespace)
local out = { }
for i = 1, #s do
local c = s:byte(i, i)
if c < 32 then
c = ctrlrepr:sub(c*3+1, c*3+3)
elseif includespace and c == 32 then
c = spacerepr
elseif c == 127 then
c = delrepr
else
c = string.char(c)
end
out[#out+1] = c
end
return table.concat(out)
end
return uniconv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment