Created
August 7, 2012 07:11
-
-
Save bow-fujita/3282693 to your computer and use it in GitHub Desktop.
How to encode UTF-8 string in JSON
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <stdexcept> | |
#include <vector> | |
#include <string> | |
#include <cstring> | |
#include <locale> | |
#include <boost/property_tree/json_parser.hpp> | |
using namespace std; | |
class CodeCvt | |
{ | |
public: | |
explicit CodeCvt(const char *locale="en_US.UTF-8") | |
: locale_(locale), codecvt_(use_facet<codecvt_type>(locale_)) {} | |
wstring widen(const string &s) | |
{ | |
const char *fb = s.data(); | |
const char *fe = fb+s.size(); | |
const char *fn; | |
vector<wchar_t> ws(s.size()); | |
wchar_t *tb = &ws.front(); | |
wchar_t *te = tb+ws.size(); | |
wchar_t *tn; | |
mbstate_t st; | |
bzero(&st, sizeof(st)); | |
const codecvt_base::result res = codecvt_.in(st, fb, fe, fn, tb, te, tn); | |
assert_result(res, fn, fe); | |
return wstring(tb, tn); | |
} | |
string narrow(const wstring &ws) | |
{ | |
const wchar_t *fb = ws.data(); | |
const wchar_t *fe = fb+ws.size(); | |
const wchar_t *fn; | |
vector<char> s(ws.size()*4); | |
char *tb = &s.front(); | |
char *te = tb+s.size(); | |
char *tn; | |
mbstate_t st; | |
bzero(&st, sizeof(st)); | |
const codecvt_base::result res = codecvt_.out(st, fb, fe, fn, tb, te, tn); | |
assert_result(res, fn, fe); | |
return string(tb, tn); | |
} | |
private: | |
template<class CharT> | |
static void assert_result(codecvt_base::result res, const CharT *fn, const CharT *fe) | |
{ | |
switch(res) | |
{ | |
case codecvt_base::ok: | |
if(fn != fe) | |
{ | |
throw invalid_argument("Conversion failure"); | |
} | |
break; | |
case codecvt_base::partial: | |
throw invalid_argument("partial"); | |
case codecvt_base::error: | |
throw invalid_argument("error"); | |
case codecvt_base::noconv: | |
throw invalid_argument("noconv"); | |
} | |
} | |
typedef codecvt<wchar_t, char, mbstate_t> codecvt_type; | |
const locale locale_; | |
const codecvt_type &codecvt_; | |
}; | |
int main(int argc, char *argv[]) | |
{ | |
try | |
{ | |
CodeCvt cvt; | |
boost::property_tree::wptree pt; | |
pt.put(L"message", cvt.widen(argv[1])); | |
boost::property_tree::write_json(wcout, pt, false); | |
return 0; | |
} | |
catch(const exception &e) | |
{ | |
cerr << e.what() << endl; | |
return 1; | |
} | |
} |
Thank you! I spent many hours in trying to solve the problem with saving UTF-8 strings in JSON file, and this method really helped me :)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See also boost::property_treeで日本語を含むJSONが文字化けする