Skip to content

Instantly share code, notes, and snippets.

@m039
Created June 12, 2011 19:35
Show Gist options
  • Save m039/1021913 to your computer and use it in GitHub Desktop.
Save m039/1021913 to your computer and use it in GitHub Desktop.
Print a content of an utf16 file to the stdout.
/* Author: m039 <flam44 (at) gmail (dot) com> */
#include <stdio.h>
/*
* Use the next command to test this snippet:
*
* iconv -f utf8 -t utf16 utf8.txt -o utf16.txt
* ./utf16_to_utf8 utf16.txt > utf8.txt
*/
static FILE *g_fp;
static char g_buffer[512]; /* The buffer is too small for big files */
typedef unsigned int character;
void utf8(character ch, char **string) {
char *src;
if (*string == NULL) {
return;
}
src = *string;
if (ch <= 0x7f) {
src[0] = ch;
*string += 1;
} else if (ch <= 0x0007ff) {
src[0] = 0xc0 | (ch >> 6) & 0x1f;
src[1] = 0x80 | (ch & 0x3f);
*string += 2;
} else if (ch <= 0x00ffff) {
src[0] = 0xe0 | (ch >> 12) & 0x0f;
src[1] = 0x80 | (ch >> 6) & 0x3f;
src[2] = 0x80 | (ch & 0x3f);
*string += 3;
} else if (ch <= 0x10ffff) {
src[0] = 0xf0 | (ch >> 18) & 0x07;
src[1] = 0x80 | (ch >> 12) & 0x3f;
src[2] = 0x80 | (ch >> 6) & 0x3f;
src[3] = 0x80 | (ch & 0x3f);
*string += 4;
}
}
int fgetshort(FILE *fp) {
int c1, c2;
c1 = fgetc(fp);
c2 = fgetc(fp);
if ((c1 == EOF) || (c2 == EOF)) {
return EOF;
}
return ((c2 << 8) | c1) & 0xffff;
}
int main(int argc, char **argv) {
int c, surrogate;
char *p = g_buffer;
if (argc < 2) {
fprintf(stderr, "Usage: ./utf16_to_utf8 <from>\n");
return -1;
}
g_fp = fopen(argv[1], "r");
if (g_fp == NULL) {
perror(argv[1]);
return -1;
}
while ((c = fgetshort(g_fp)) != EOF) {
if ((c >= 0xd800) && (c <= 0xdbff)) {
surrogate = (c - 0xd800) << 10;
} else if ((c >= 0xdc00) && (c <= 0xdfff)) {
surrogate |= c - 0xdc00;
utf8(surrogate + 0x10000, &p);
} else {
utf8(c, &p);
}
}
*p = 0;
fclose(g_fp);
puts(g_buffer);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment