Skip to content

Instantly share code, notes, and snippets.

@bkamins
Created November 2, 2017 09:31
Show Gist options
  • Save bkamins/5a2f89cab14d434e3f3e4a23c121a8a2 to your computer and use it in GitHub Desktop.
Save bkamins/5a2f89cab14d434e3f3e4a23c121a8a2 to your computer and use it in GitHub Desktop.
function decode_utf8(x::Char)
c = reinterpret(UInt32, x)
if c < 0x00000080
return c
end
if 0x0000C200 <= c & 0x0000FF00 <= 0x0000DF00
if c & 0xffff0000 == 0 && 0x00000080 <= c & 0x000000FF <= 0x000000BF
return (c & 0x0000003F) | ((c & 0x00001F00) >> 2)
end
end
if c & 0x00FF0000 == 0x00E00000
if c & 0xff000000 == 0 && 0x0000A000 <= c & 0x0000FF00 <= 0x0000BF00 && 0x00000080 <= c & 0x000000FF <= 0x000000BF
return (c & 0x0000003F) | ((c & 0x00003F00) >> 2) | ((c & 0x000F0000) >> 4)
end
end
if 0x00E10000 <= c & 0x00FF0000 <= 0x00EC0000
if c & 0xff000000 == 0 && 0x00008000 <= c & 0x0000FF00 <= 0x0000BF00 && 0x00000080 <= c & 0x000000FF <= 0x000000BF
return (c & 0x0000003F) | ((c & 0x00003F00) >> 2) | ((c & 0x000F0000) >> 4)
end
end
if c & 0x00FF0000 == 0x00ED0000
if c & 0xff000000 == 0 && 0x00008000 <= c & 0x0000FF00 <= 0x00009F00 && 0x00000080 <= c & 0x000000FF <= 0x000000BF
return (c & 0x0000003F) | ((c & 0x00003F00) >> 2) | ((c & 0x000F0000) >> 4)
end
end
if 0x00EE0000 <= c & 0x00FF0000 <= 0x00EF0000
if c & 0xff000000 == 0 && 0x00008000 <= c & 0x0000FF00 <= 0x0000BF00 && 0x00000080 <= c & 0x000000FF <= 0x000000BF
return (c & 0x0000003F) | ((c & 0x00003F00) >> 2) | ((c & 0x000F0000) >> 4)
end
end
if c & 0xFF000000 == 0xF0000000
if 0x00900000 <= c & 0x00FF0000 <= 0x00BF0000 && 0x00008000 <= c & 0x0000FF00 <= 0x0000BF00 && 0x00000080 <= c & 0x000000FF <= 0x000000BF
return (c & 0x0000003F) | ((c & 0x00003F00) >> 2) | ((c & 0x003F0000) >> 4) | (c & 0x07000000 >> 6)
end
end
if 0xF1000000 <= c & 0xFF000000 <= 0xF3000000
if 0x00800000 <= c & 0x00FF0000 <= 0x00BF0000 && 0x00008000 <= c & 0x0000FF00 <= 0x0000BF00 && 0x00000080 <= c & 0x000000FF <= 0x000000BF
return (c & 0x0000003F) | ((c & 0x00003F00) >> 2) | ((c & 0x003F0000) >> 4) | (c & 0x07000000 >> 6)
end
end
if c & 0xFF000000 == 0xF4000000
if 0x00800000 <= c & 0x00FF0000 <= 0x008F0000 && 0x00008000 <= c & 0x0000FF00 <= 0x0000BF00 && 0x00000080 <= c & 0x000000FF <= 0x000000BF
return (c & 0x0000003F) | ((c & 0x00003F00) >> 2) | ((c & 0x003F0000) >> 4) | (c & 0x07000000 >> 6)
end
end
error("invalid UTF-8")
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment