Created
November 24, 2009 05:45
-
-
Save jjgod/241673 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <string.h> | |
#include <errno.h> | |
#include <iconv.h> | |
char *gb18030_to_utf8(const char *src) | |
{ | |
iconv_t cd; | |
size_t in, out, len, err; | |
char *dest, *outp, *inp = (char *) src; | |
cd = iconv_open("utf-8", "gb18030"); | |
if (cd < 0) | |
return NULL; | |
in = strlen(src); | |
/* Chinese characters in GB18030 are commonly 2 bytes, in UTF-8 they are | |
* 3 bytes or more, it quite rare to find a character exceeds such range, | |
* plus we may have some non-Chinese characters which only need 1 byte, | |
* so the size allocated here should be sufficient for most cases */ | |
out = len = in * 3 / 2 + 1; | |
outp = dest = (char *) malloc(len); | |
again: | |
err = iconv(cd, &inp, &in, &outp, &out); | |
/* If the pre-allocated output buffer is not large enough, we have to | |
* enlarge it by realloc(), then update related pointers and counters */ | |
if (err == (size_t)(-1) && errno == E2BIG) | |
{ | |
size_t used = outp - dest; | |
len *= 2; /* double the size may not be the most economic option though */ | |
char *newdest = (char *) realloc(dest, len); | |
if (! newdest) | |
goto out; | |
dest = newdest; | |
outp = dest + used; | |
out = len - used - 1; | |
goto again; | |
} | |
out: | |
/* Make sure we have the trailing '\0' */ | |
if (outp) | |
*outp = '\0'; | |
iconv_close(cd); | |
return dest; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment