Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dtzWill/7bc07da1dcd02e01c2fbb28cbaa81420 to your computer and use it in GitHub Desktop.
Save dtzWill/7bc07da1dcd02e01c2fbb28cbaa81420 to your computer and use it in GitHub Desktop.
#include <iconv.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
char* convert(const char *from_charset, const char *to_charset, const char *input) {
size_t inleft, outleft, converted = 0;
char *output, *outbuf, *tmp;
const char *inbuf;
size_t outlen;
iconv_t cd;
int errno;
if ((cd = iconv_open(to_charset, from_charset)) == (iconv_t) -1)
return NULL;
inleft = strlen(input);
inbuf = input;
/* we'll start off allocating an output buffer which is the same size
* as our input buffer. */
outlen = inleft;
/* we allocate 4 bytes more than what we need for nul-termination... */
if (!(output = (char*) malloc(outlen + 4))) {
iconv_close(cd);
return NULL;
}
do {
errno = 0;
outbuf = output + converted;
outleft = outlen - converted;
converted = iconv(cd, (char **) &inbuf, &inleft, &outbuf, &outleft);
if (converted != (size_t) -1 || errno == EINVAL) {
/*
* EINVAL An incomplete multibyte sequence has been encoun­-
* tered in the input.
*
* We'll just truncate it and ignore it.
*/
break;
}
if (errno != E2BIG) {
/*
* EILSEQ An invalid multibyte sequence has been encountered
* in the input.
*
* Bad input, we can't really recover from this.
*/
iconv_close(cd);
free(output);
return NULL;
}
/*
* E2BIG There is not sufficient room at *outbuf.
*
* We just need to grow our outbuffer and try again.
*/
converted = outbuf - output;
outlen += inleft * 2 + 8;
if (!(tmp = (char*) realloc(output, outlen + 4))) {
iconv_close(cd);
free(output);
return NULL;
}
output = tmp;
outbuf = output + converted;
} while (1);
/* flush the iconv conversion */
iconv(cd, NULL, NULL, &outbuf, &outleft);
iconv_close(cd);
/* Note: not all charsets can be nul-terminated with a single
* nul byte. UCS2, for example, needs 2 nul bytes and UCS4
* needs 4. I hope that 4 nul bytes is enough to terminate all
* multibyte charsets? */
/* nul-terminate the string */
memset(outbuf, 0, 4);
return output;
}
int main() {
//char input[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~€‚„…†‡‰Š‹ŚŤŽŹ‘’“”•–—™š›śťžźˇ˘Ł¤Ą¦§¨©Ş«¬®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ";
char input[] = "caffc8f] はれひほふ\n Author: A U Thor <author@example.com>";
//char *encoding= "cp1250";
char *encoding = "ISO-2022-JP";
char* outputCp = convert("UTF-8", encoding, input);
printf("Input: %s\n", input);
printf("UTF-8 -> %s: %s\n", encoding, outputCp);
char* outputUtf8 = convert(encoding, "UTF-8", outputCp);
printf("UTF-8 -> %s -> UTF-8: %s\n", encoding, outputUtf8);
if (strcmp(outputUtf8, input) == 0) {
printf("Input and output are identical\n");
} else {
printf("Output DOESN'T match with the input\n");
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment