Skip to content

Instantly share code, notes, and snippets.

@vasi
Created June 1, 2013 21:27
Show Gist options
  • Save vasi/5691787 to your computer and use it in GitHub Desktop.
Save vasi/5691787 to your computer and use it in GitHub Desktop.
UTF-16LE to UTF-8 conversion
#include <stdio.h>
#define BOM 0xFEFF
#define SUR1 0xD800
#define SUR2 0xDC00
int read16(void) {
int a = getc(stdin), b = getc(stdin);
if (a == EOF || b == EOF)
return EOF;
return (b << 8) + a;
}
int readc(void) {
static int save = 0;
int a = save ? save : read16();
save = 0;
if (a < SUR1 || a >= SUR2)
return a;
int b = read16();
if (b >= SUR2 && b < SUR2 + SUR2 - SUR1)
return 0x10000 + (a - SUR1) * 0x400 + (b - SUR2);
save = b;
return a;
}
void writec(int c, int bs) {
int s = 6 * (bs - 1);
putc(0xff - (0xff >> bs) + (c >> s), stdout);
for (s -= 6; s >= 0; s -= 6)
putc(((c >> s) & 0x3f) | 0x80, stdout);
}
int main(void) {
int c = readc();
if (c == BOM)
c = readc();
while (c != EOF) {
if (c < 0x80) putc(c, stdout);
else if (c < 0x800) writec(c, 2);
else if (c < 0x10000) writec(c, 3);
else writec(c, 4);
c = readc();
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment