Last active
December 17, 2022 19:20
-
-
Save natanael-b/3de34eff407477cc519eb6a309ebaf14 to your computer and use it in GitHub Desktop.
Lua latinic especial chars upper and lower.lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local upper2lower = {[65]=97, [66]=98, [67]=99, [68]=100, [69]=101, [70]=102, [71]=103, [72]=104, | |
[74]=106, [75]=107, [76]=108, [77]=109, [78]=110, [79]=111, [80]=112, [81]=113, | |
[83]=115, [84]=116, [85]=117, [86]=118, [87]=119, [88]=120, [89]=121, [90]=122, | |
[193]=225,[194]=226,[195]=227,[196]=228,[197]=229,[198]=230,[199]=231,[200]=232, | |
[201]=233,[202]=234,[203]=235,[204]=236,[205]=237,[206]=238,[207]=239,[208]=240, | |
[209]=241,[210]=242,[211]=243,[212]=244,[213]=245,[214]=246,[216]=248,[217]=249, | |
[218]=250,[219]=251,[220]=252,[221]=253,[222]=254,[376]=255,[73]=105, [82]=114, | |
[192]=224}; | |
local lower2upper = {[97]=65, [98]=66, [99]=67, [100]=68, [101]=69, [102]=70, [103]=71, [104]=72,[105]=73, | |
[106]=74, [107]=75, [108]=76, [109]=77, [110]=78, [111]=79, [112]=80, [113]=81,[114]=82, | |
[115]=83, [116]=84, [117]=85, [118]=86, [119]=87, [120]=88, [121]=89, [122]=90,[224]=192, | |
[225]=193,[226]=194,[227]=195,[228]=196,[229]=197,[230]=198,[231]=199,[232]=200, | |
[233]=201,[234]=202,[235]=203,[236]=204,[237]=205,[238]=206,[239]=207,[240]=208, | |
[241]=209,[242]=210,[243]=211,[244]=212,[245]=213,[246]=214,[248]=216,[249]=217, | |
[250]=218,[251]=219,[252]=220,[253]=221,[254]=222,[255]=376}; | |
local str_lower = string.lower | |
local str_upper = string.upper | |
function string.lower(str,utf8_mode) | |
if not utf8_mode then | |
return str_lower(str) | |
end | |
local result = "" | |
for _,i in utf8.codes(str) do | |
result = result..utf8.char(upper2lower[i]==nil and i or upper2lower[i]) | |
end | |
return result | |
end | |
function string.upper(str,utf8_mode) | |
if not utf8_mode then | |
return str_upper(str) | |
end | |
local result = "" | |
for _,i in utf8.codes(str) do | |
result = result..utf8.char(lower2upper[i]==nil and i or lower2upper[i]) | |
end | |
return result | |
end | |
function string.sanitize(str) | |
-- find the first invalid byte and return the string up to that point | |
local p, len = 1, #str | |
while p <= len do | |
if str:byte(p) > 127 then | |
-- check if the character is a valid multi-byte sequence | |
local char_len = 1 | |
while str:byte(p+char_len-1) > 191 and str:byte(p+char_len-1) < 224 do | |
char_len = char_len + 1 | |
end | |
if char_len == 1 then | |
-- character is invalid, remove it and move on | |
str = str:sub(1, p-1) .. str:sub(p+1) | |
len = len - 1 | |
else | |
p = p + char_len - 1 | |
end | |
end | |
p = p + 1 | |
end | |
return str | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I just nicked this for my editor config - https://codeberg.org/lusci/lusci