Skip to content

Instantly share code, notes, and snippets.

@osiyuk
Created March 25, 2018 14:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save osiyuk/4836db0055ce56ceff5e3ddff0dff2ee to your computer and use it in GitHub Desktop.
Save osiyuk/4836db0055ce56ceff5e3ddff0dff2ee to your computer and use it in GitHub Desktop.
utf8 decoder
int get_utf8_char(FILE *stream)
{
int a, b, c, d;
a = fgetc(stream);
if (ferror(stream)) goto error;
if (a == EOF) { return EOF; }
// Interesting see how ASCII and UTF-8 having good relationships
if ((a & 0x80) == 0) {
return a;
}
#define CHECK(condition) if (!(condition)) goto error;
CHECK((a & 0x40) != 0)
b = fgetc(stream);
if (ferror(stream)) goto error;
CHECK((b & 0xc0) == 0x80)
b &= 0x3f;
if ((a & 0x20) == 0) {
CHECK((a & 0x1e) > 0)
return ((a & 0x1f) << 6) | b;
}
c = fgetc(stream);
if (ferror(stream)) goto error;
CHECK((c & 0xc0) == 0x80)
if ((a & 0x10) == 0) {
CHECK(((a & 0x0f) | (b & 0x20)) > 0)
return ((a & 0x0f) << 12) | (b << 6) | (c & 0x3f);
}
d = fgetc(stream);
if (ferror(stream)) goto error;
CHECK((d & 0xc0) == 0x80)
c &= 0x3f;
if ((a & 0x08) == 0) {
CHECK(((a & 0x07) | (b & 0x30)) > 0)
return ((a & 0x07) << 18) | (b << 12) | (c << 6) | (d & 0x3f);
}
CHECK(0);
#undef CHECK
error:
perror("utf8_decoder");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment