Skip to content

Instantly share code, notes, and snippets.

@Egor-Skriptunoff
Created October 11, 2020 22:03
Show Gist options
  • Save Egor-Skriptunoff/8f5caf39b4611a297dc82bc43c70a158 to your computer and use it in GitHub Desktop.
Save Egor-Skriptunoff/8f5caf39b4611a297dc82bc43c70a158 to your computer and use it in GitHub Desktop.
Function wcwidth implemented in pure Lua
local wcwidth -- function wcwidth(codepoint) returns 0, 1 or 2 for codepoints from the range 0..0x10FFFF
do -- implementation from the C library "utf8proc" was rewritten in Lua, compressed and minified
local d = [[r6rur7!&B!!(!"my!"arCssststssrNx|}ssrDr+r&sr{zsxttsvr7sssr4r1rq}rP{{sr.vs{suswrAu!!/rGrLusr(sy|tr
3urNssytttu{s|tr0sturNsswvttuusr4tus}urNsszsusur*tr,xsurNssytttuyu|tr4srQwuusv{sr>wrOysusvyt}tr3urNssysusvyt}tr2vrMt
sysusv{s|tr3ur\svxssszr(trSsty~zrxst{}xr`tr1sssssvtrGr*stw}sr:{srzr*r-vvusutyuvr#~ss|vrxrv!%=u!)zur3ur3tr4trVr6{srCv
!!.tr8s!!.~v~!"5wrO|sr3tsrFr'rUwrEr'r<{~ur4r#rNr$rFr*!!Pusr+vsxstu!!~rPsw!%Kwr.yrGws|rvr7!%itr#t!!vvusts!%Itr+trH~rA
sr)sr#sztr'twtzswsr+sytssvstsysvtr2sr9sssvussrSur.sr$s!)#trIsvs!$7u!!Esrvr6!!8r0sro~!"0r0~vr@vr'trlttr{wrAsrtsrj~rEs
r>z!jZrV#C1u!,;urM!$Fvs|r6trft!"jsusvsr-wvsritrHr(r0r(r#sr<zr/r#~r3uvrEr$r:srYr$~sztrCurHssuttwtssr?wwt!"Fzstr("8frr
!5`!c|!#jt!!"rZs!'er&|xr&r9sr)sv!!Kssrv!!7yr(u!%As!"<s!!Mw!1^ustwvr>uvs!!]t!%}v!$!t!!Q}!!gurKr%rNvrC}tsr%srHur:r$r&t
rBs~urFr$zvstrr~xs!!X~r+vrMtsytttu{s|ttyuw!!xr(r-srgr*!"Eyt{r1trhr'!!"r#r{r%!"Zr%!"Oxsttvssst!!Eytyusr2|r>ysvzs{}rDr
&!$3zszrhr,sr$!!2xusstsyssrXwstsw!#Wv!Ya{"VuwrQy!+$ssrMyvrcvs}tr$!a0z!-&r@{"!+!"yrGur'vz!$*!:+tsv!XGwur,tyr4v!!Lu!4y
rMvrHzsr$sr,wsr%!.Bysr'tystsw!"_y!$Sv!/ty!!%y!23s!"$s!!vst|r{ur#rBv{ytr$x!!Rr7~{sr\sr,~rAvw~r'usur]sss!!strUr#vsr.r(
sr0tr#srlrkrFr\xsuutur)ty{!"=~!"zrEs|srHsrhsrI!!(wuuwy{r/yy{ur#y!-y%n@r8!L#}!"8t!]2r$!oE!@u!%^!/v!TIr"Lsr4rv!!8!"J]]
local i, a = 1, {2, 72, 68, 6, 9, 0, 3, 1, 19, 30, 2, 32, 2, 2, 4, 3, 1, 1, 5, 2, 4, 164, 1, 1, 16, 44, 106, 6}
local w, s, e, h, z, c, W, b = 0, 1, 239, d:gmatch"%S", 1, 0, {0}, d.byte; local function g() if e == 0 then
e, i, s = a[i], i+1, 4-s; if not e then return elseif w ~= 1 then w = 2-w; return w end end; e, w = e-1, s-w
return w end; repeat local pr, w = c, g(); if w then c = b(h())-114; if c < 1 then c = c*94+b(h())-22
if c < 13 then c = c*94+b(h())+714754 end end; c, z = pr+c, z+2; W[z], W[z-1] = w, c end until not w
function wcwidth(codepoint) local m, l, r = 1, 1, z; repeat local s = (r-l)/4; if s >= m then s, m = m, m*4 else
s = s-s%1 end; local k = l+s*2; if codepoint < W[k+1] then r = k else l = k+2 end until l == r; return W[l] end
end
local function utf8_string_width(str)
local width = 0
for u in str:gmatch".[\128-\191]*" do
local c = u:byte()%2^(8-#u)
for j = 2, #u do c = (c-2)*64+u:byte(j) end
width = width + wcwidth(c)
end
return width
end
-- Examples:
print(utf8_string_width("Hello")) --> 5
print(utf8_string_width("Привет")) --> 6
print(utf8_string_width("你好")) --> 4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment