Skip to content

Instantly share code, notes, and snippets.

@natanael-b
Last active December 17, 2022 19:20
Show Gist options
  • Save natanael-b/3de34eff407477cc519eb6a309ebaf14 to your computer and use it in GitHub Desktop.
Save natanael-b/3de34eff407477cc519eb6a309ebaf14 to your computer and use it in GitHub Desktop.
Lua latinic especial chars upper and lower.lua
local upper2lower = {[65]=97, [66]=98, [67]=99, [68]=100, [69]=101, [70]=102, [71]=103, [72]=104,
[74]=106, [75]=107, [76]=108, [77]=109, [78]=110, [79]=111, [80]=112, [81]=113,
[83]=115, [84]=116, [85]=117, [86]=118, [87]=119, [88]=120, [89]=121, [90]=122,
[193]=225,[194]=226,[195]=227,[196]=228,[197]=229,[198]=230,[199]=231,[200]=232,
[201]=233,[202]=234,[203]=235,[204]=236,[205]=237,[206]=238,[207]=239,[208]=240,
[209]=241,[210]=242,[211]=243,[212]=244,[213]=245,[214]=246,[216]=248,[217]=249,
[218]=250,[219]=251,[220]=252,[221]=253,[222]=254,[376]=255,[73]=105, [82]=114,
[192]=224};
local lower2upper = {[97]=65, [98]=66, [99]=67, [100]=68, [101]=69, [102]=70, [103]=71, [104]=72,[105]=73,
[106]=74, [107]=75, [108]=76, [109]=77, [110]=78, [111]=79, [112]=80, [113]=81,[114]=82,
[115]=83, [116]=84, [117]=85, [118]=86, [119]=87, [120]=88, [121]=89, [122]=90,[224]=192,
[225]=193,[226]=194,[227]=195,[228]=196,[229]=197,[230]=198,[231]=199,[232]=200,
[233]=201,[234]=202,[235]=203,[236]=204,[237]=205,[238]=206,[239]=207,[240]=208,
[241]=209,[242]=210,[243]=211,[244]=212,[245]=213,[246]=214,[248]=216,[249]=217,
[250]=218,[251]=219,[252]=220,[253]=221,[254]=222,[255]=376};
local str_lower = string.lower
local str_upper = string.upper
function string.lower(str,utf8_mode)
if not utf8_mode then
return str_lower(str)
end
local result = ""
for _,i in utf8.codes(str) do
result = result..utf8.char(upper2lower[i]==nil and i or upper2lower[i])
end
return result
end
function string.upper(str,utf8_mode)
if not utf8_mode then
return str_upper(str)
end
local result = ""
for _,i in utf8.codes(str) do
result = result..utf8.char(lower2upper[i]==nil and i or lower2upper[i])
end
return result
end
function string.sanitize(str)
-- find the first invalid byte and return the string up to that point
local p, len = 1, #str
while p <= len do
if str:byte(p) > 127 then
-- check if the character is a valid multi-byte sequence
local char_len = 1
while str:byte(p+char_len-1) > 191 and str:byte(p+char_len-1) < 224 do
char_len = char_len + 1
end
if char_len == 1 then
-- character is invalid, remove it and move on
str = str:sub(1, p-1) .. str:sub(p+1)
len = len - 1
else
p = p + char_len - 1
end
end
p = p + 1
end
return str
end
@strainer
Copy link

I just nicked this for my editor config - https://codeberg.org/lusci/lusci

-- lookups from https://gist.github.com/natanael-b/3de34eff407477cc519eb6a309ebaf14
local utf_lower = { 
 [192]=224, [193]=225, [194]=226, [195]=227, [196]=228, [197]=229, [198]=230, [199]=231, 
 [200]=232, [201]=233, [202]=234, [203]=235, [204]=236, [205]=237, [206]=238, [207]=239, 
 [208]=240, [209]=241, [210]=242, [211]=243, [212]=244, [213]=245, [214]=246, [216]=248, 
 [217]=249, [218]=250, [219]=251, [220]=252, [221]=253, [222]=254, [376]=255 }

local utf_upper = { 
 [224]=192, [225]=193, [226]=194, [227]=195, [228]=196, [229]=197, [230]=198, [231]=199, 
 [232]=200, [233]=201, [234]=202, [235]=203, [236]=204, [237]=205, [238]=206, [239]=207, 
 [240]=208, [241]=209, [242]=210, [243]=211, [244]=212, [245]=213, [246]=214, [248]=216, 
 [249]=217, [250]=218, [251]=219, [252]=220, [253]=221, [254]=222, [255]=376 }
                     
local function utfcaseup(c,lo)
  if lo then c=string.lower(c) lo=utf_lower
  else c=string.upper(c) lo=utf_upper end
  if not c:find("[\194-\244]") then return c end
  
  local r= {}
  for _,i in utf8.codes(c) do  r[#r+1] = utf8.char( lo[i] or i ) end
  return table.concat(r)
end

function upperutf8(c)  return utfcaseup(c)  end
function lowerutf8(c)  return utfcaseup(c,"low") end

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment