Skip to content

Instantly share code, notes, and snippets.

@svip
Last active April 30, 2017 08:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save svip/73c080f63894002b4aba25a87638d4c1 to your computer and use it in GitHub Desktop.
Save svip/73c080f63894002b4aba25a87638d4c1 to your computer and use it in GitHub Desktop.
// Stolen from http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
const UTF8_ACCEPT=0;
const UTF8_REJECT=1;
const utf8d: array[0..399] of Byte = (
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
$a,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$3,$4,$3,$3, // e0..ef
$b,$6,$6,$6,$5,$8,$8,$8,$8,$8,$8,$8,$8,$8,$8,$8, // f0..ff
$0,$1,$2,$3,$5,$8,$7,$1,$1,$1,$4,$6,$1,$1,$1,$1, // s0..s0
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // s7..s8
);
type PLongword = ^Longword;
function UTF8Decode(State: PLongword; Codep: PLongword; By: Longword): Longword;
var
Tp: Longword;
begin
Tp := utf8d[By];
if state^ <> UTF8_ACCEPT then
codep^ := (By and cardinal($3f){u}) or (codep^ shl 6)
else
codep^ := ($ff shr Tp) and (By);
state^ := utf8d[256 + (state^ * 16) + Tp];
exit(state^);
end;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment