Created
September 16, 2011 11:03
-
-
Save cypres/1221865 to your computer and use it in GitHub Desktop.
iconv convert
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <iconv.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/errno.h> | |
using namespace std; | |
char * | |
convert(const char *from_charset, const char *to_charset, const char *input) { | |
size_t inleft, outleft, converted = 0; | |
char *output, *outbuf, *tmp; | |
const char *inbuf; | |
size_t outlen; | |
iconv_t cd; | |
int errno; | |
if ((cd = iconv_open(to_charset, from_charset)) == (iconv_t) -1) | |
return NULL; | |
inleft = strlen(input); | |
inbuf = input; | |
/* we'll start off allocating an output buffer which is the same size | |
* as our input buffer. */ | |
outlen = inleft; | |
/* we allocate 4 bytes more than what we need for nul-termination... */ | |
if (!(output = (char*) malloc(outlen + 4))) { | |
iconv_close(cd); | |
return NULL; | |
} | |
do { | |
errno = 0; | |
outbuf = output + converted; | |
outleft = outlen - converted; | |
converted = iconv(cd, (char **) &inbuf, &inleft, &outbuf, &outleft); | |
if (converted != (size_t) -1 || errno == EINVAL) { | |
/* | |
* EINVAL An incomplete multibyte sequence has been encoun- | |
* tered in the input. | |
* | |
* We'll just truncate it and ignore it. | |
*/ | |
break; | |
} | |
if (errno != E2BIG) { | |
/* | |
* EILSEQ An invalid multibyte sequence has been encountered | |
* in the input. | |
* | |
* Bad input, we can't really recover from this. | |
*/ | |
iconv_close(cd); | |
free(output); | |
return NULL; | |
} | |
/* | |
* E2BIG There is not sufficient room at *outbuf. | |
* | |
* We just need to grow our outbuffer and try again. | |
*/ | |
converted = outbuf - output; | |
outlen += inleft * 2 + 8; | |
if (!(tmp = (char*) realloc(output, outlen + 4))) { | |
iconv_close(cd); | |
free(output); | |
return NULL; | |
} | |
output = tmp; | |
outbuf = output + converted; | |
} while (1); | |
/* flush the iconv conversion */ | |
iconv(cd, NULL, NULL, &outbuf, &outleft); | |
iconv_close(cd); | |
/* Note: not all charsets can be nul-terminated with a single | |
* nul byte. UCS2, for example, needs 2 nul bytes and UCS4 | |
* needs 4. I hope that 4 nul bytes is enough to terminate all | |
* multibyte charsets? */ | |
/* nul-terminate the string */ | |
memset(outbuf, 0, 4); | |
return output; | |
} | |
int main() { | |
string input = string("Test æøå a"); | |
char* outoutBuffer; | |
outoutBuffer = convert("UTF-8", "ISO-8859-1", input.c_str()); | |
string output = string(outoutBuffer); | |
cout << input << endl; | |
cout << output << endl; | |
return 0; | |
} |
Thank you for your code this help me for my project c ++ to print in a terminal TM T20II epson ticket
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello, thanks a lot for this nice example, the cleaner I found so far for iconv !
Just a question with the realloc. I'm not very fluent with C and I found that realloc could be tricky in some case:
https://stackoverflow.com/questions/44789295/correct-use-of-realloc
Do you think it worth it to use this double pointer or is it too much ?