Create a gist now

Instantly share code, notes, and snippets.

C++ json writer

why?

This code implements a naive JSON writer in C++ complying with RFC 4627. I wrote this as I believe it is a very good example of a real life problem involving lots of C++ constructs. This sample only supports writing JSON and does not support heterogenous ‘object’ serialization and extension are left as an exercice. See Writing json in C++ for some details.

what?

  • json_stream: a std::ofstream wrapper fulfilling RFC 4627 constraints;
  • utf8_json: some code to decode/“json encode” std::string UTF-8 buffers
  • json_test.cpp: a very simple program testing the code

how?

Some part of the code require C++11 (std::true_type, u8"") e.g. clang++ -std=c++11 json_test.cpp && ./a.out && cat /tmp/toto

#include <string>
#include <sstream>
#include <fstream>
#include <vector>
#include <map>
#include <cmath>
#include <limits>
#include "utf8_json"
template<typename T>
struct is_container : std::false_type {};
template<typename T>
struct is_associative_container : std::false_type {};
// vector overload
template<typename T>
struct is_container< std::vector<T> > : std::true_type {};
// map overload
template<typename K, typename V>
struct is_container< std::map<K, V> > : std::true_type {};
template<typename V>
struct is_associative_container< std::map<std::string, V> > : std::true_type {};
class json_stream {
public:
json_stream(const std::string& path) : _stream(path.c_str())
{}
template<typename T>
json_stream& operator<<(const T& data) {
if (_stream.is_open()) {
_stream << dump(data);
}
return *this;
}
private:
// forward standard manipulator like std::endl
typedef std::ostream& (*ostream_manipulator)(std::ostream&);
json_stream& operator<<(ostream_manipulator pf) {
if (_stream.is_open()) {
_stream << pf;
}
return *this;
}
template<typename T>
std::string dump(const T& t) const {
// dispatch to actual dump method:
// * not iterable type dumped as simple value
// * iterable type
// * with mapped value dumped as mapped_container
// * otherwise dumped as simple_container
return dump_value_or_container(t, typename is_container<T>::type());
}
// dispatch to correct dump method
template<typename T>
std::string dump_value_or_container(const T& t, std::false_type) const {
return dump_value(t);
}
template<typename T>
std::string dump_value_or_container(const T& t, std::true_type) const {
return dump_simple_or_associative_container(t, typename is_associative_container<T>::type());
}
template<typename T>
std::string dump_simple_or_associative_container(const T& t, std::false_type) const {
return dump_simple_container(t);
}
template<typename T>
std::string dump_simple_or_associative_container(const T& t, std::true_type) const {
return dump_associative_container(t);
}
// implement type specific serialization
template<typename V>
std::string dump_value(const V& value) const {
std::ostringstream oss;
oss << sanitize(value);
return oss.str();
}
std::string dump_value(const std::string& value) const {
return "\"" + sanitize(value) + "\"";
}
template<typename K, typename V>
std::string dump_value(const std::pair<const K, V>& pair) const {
std::ostringstream oss;
oss << "[" << dump(pair.first) << ", " << dump(pair.second) << "]";
return oss.str();
}
template<typename V>
std::string dump_pair(const std::pair<const std::string, V>& pair) const {
std::ostringstream oss;
oss << dump(pair.first) << ": " << dump(pair.second);
return oss.str();
}
template<typename C>
std::string dump_simple_container(const C& container) const
{
std::ostringstream oss;
typename C::const_iterator it = container.begin();
oss << "[" << dump(*it);
for (++ it ; it != container.end() ; ++ it) {
oss << ", " << dump(*it);
}
oss << "]";
return oss.str();
}
template<typename M>
std::string dump_associative_container(const M& map) const
{
std::ostringstream oss;
typename M::const_iterator it = map.begin();
oss << "{" << dump_pair(*it);
for (++ it ; it != map.end() ; ++ it) {
oss << ", " << dump_pair(*it);
}
oss << "}";
return oss.str();
}
template<typename T>
T sanitize(const T& t) const {
return t;
}
template <typename T>
int sgn(const T& val) const {
return (T(0) < val) - (val < T(0));
}
double sanitize(const double d) const {
if(std::isfinite(d)) {
return d;
}
else {
if(std::isinf(d)) {
return sgn(d) * std::numeric_limits<double>::max();
}
return 0.;
}
}
double sanitize(const float f) const {
return sanitize(static_cast<double>(f));
}
std::string sanitize(std::string const& input) const {
return utf8_json::json_encode_codepoints(utf8_json::decode_utf8(input));
}
std::ofstream _stream;
};
#include "json_stream"
#include <map>
#include <vector>
#include <iostream>
int main() {
json_stream js(std::string("/tmp/toto"));
std::map< std::string, std::vector<float> > object;
std::vector<float> data = { 1., std::numeric_limits<double>::quiet_NaN(), -std::numeric_limits<float>::infinity() };
std::string name = u8"foo+é+\n\r\b\t+\v\0+∞";
object[name] = data;
js << object;
return 0;
}
#include <string>
#include <sstream>
#include <iomanip>
namespace utf8_json {
inline unsigned int mask8(char const value) {
return value & 0xff;
}
inline bool is_valid_continuation_byte(unsigned int byte) {
return ((byte & 0xC0) == 0x80);
}
inline int get_next_byte(std::string::const_iterator& iterator, std::string::const_iterator end_iterator) {
if(iterator != end_iterator) {
return mask8(*(++ iterator));
}
else {
return 0; // invalid continuation byte
}
}
void insert_replacement(std::vector<unsigned int>& output, unsigned int replacement, unsigned int count) {
for(unsigned int i = 0 ; i < count ; ++ i) {
output.push_back(replacement);
}
}
std::vector<unsigned int> decode_utf8(const std::string& input, const int replacement=0xfffd) {
unsigned int code_unit1, code_unit2, code_unit3, code_unit4;
std::vector<unsigned int> codepoints;
for(std::string::const_iterator iterator = input.begin() ; iterator != input.end() ; ++ iterator) {
code_unit1 = mask8(*iterator);
if (code_unit1 < 0x80) {
codepoints.push_back(code_unit1);
}
else if (code_unit1 < 0xC2) { // continuation or overlong 2-byte sequence
codepoints.push_back(replacement);
}
else if (code_unit1 < 0xE0) { // 2-byte sequence
code_unit2 = get_next_byte(iterator, input.end());
if (!is_valid_continuation_byte(code_unit2)) {
insert_replacement(codepoints, replacement, 2);
}
else {
codepoints.push_back((code_unit1 << 6) + code_unit2 - 0x3080);
}
}
else if (code_unit1 < 0xF0) { // 3-byte sequence
code_unit2 = get_next_byte(iterator, input.end());
if (!is_valid_continuation_byte(code_unit2) ||
(code_unit1 == 0xE0 && code_unit2 < 0xA0)) /* overlong */ {
insert_replacement(codepoints, replacement, 2);
}
else {
code_unit3 = get_next_byte(iterator, input.end());
if (!is_valid_continuation_byte(code_unit3)) {
insert_replacement(codepoints, replacement, 3);
}
else {
codepoints.push_back((code_unit1 << 12) + (code_unit2 << 6) + code_unit3 - 0xE2080);
}
}
}
else if (code_unit1 < 0xF5) { // 4-byte sequence
code_unit2 = get_next_byte(iterator, input.end());
if(!is_valid_continuation_byte(code_unit2) ||
(code_unit1 == 0xF0 && code_unit2 < 0x90) || /* overlong */
(code_unit1 == 0xF4 && code_unit2 >= 0x90)) { /* > U+10FFFF */
insert_replacement(codepoints, replacement, 2);
}
else {
code_unit3 = get_next_byte(iterator, input.end());
if(!is_valid_continuation_byte(code_unit3)) {
insert_replacement(codepoints, replacement, 3);
}
else {
code_unit4 = get_next_byte(iterator, input.end());
if(!is_valid_continuation_byte(code_unit4)) {
insert_replacement(codepoints, replacement, 4);
}
else {
codepoints.push_back((code_unit1 << 18) + (code_unit2 << 12) + (code_unit3 << 6) + code_unit4 - 0x3C82080);
}
}
}
}
else {
/* > U+10FFFF */
insert_replacement(codepoints, replacement, 1);
}
}
return codepoints;
}
std::string json_encode_control_char(unsigned int codepoint) {
std::ostringstream oss;
oss.fill('0');
oss << "\\u" << std::setw(4) << std::hex << codepoint;
return oss.str();
}
std::string utf8_encode(unsigned int codepoint) {
std::string output;
if(codepoint > 0x590 && codepoint < 0x5F4) {
return output;
}
// out of range
if(codepoint > 1114112) {
return utf8_encode(0xfffd);
}
if (codepoint < 0x80) {
output.push_back(codepoint);
}
else if (codepoint <= 0x7FF) {
output.push_back((codepoint >> 6) + 0xC0);
output.push_back((codepoint & 0x3F) + 0x80);
}
else if (codepoint <= 0xFFFF) {
output.push_back((codepoint >> 12) + 0xE0);
output.push_back(((codepoint >> 6) & 0x3F) + 0x80);
output.push_back((codepoint & 0x3F) + 0x80);
}
else if (codepoint <= 0x10FFFF) {
output.push_back((codepoint >> 18) + 0xF0);
output.push_back(((codepoint >> 12) & 0x3F) + 0x80);
output.push_back(((codepoint >> 6) & 0x3F) + 0x80);
output.push_back((codepoint & 0x3F) + 0x80);
}
return output;
}
std::string json_encode_codepoints(std::vector<unsigned int> const& codepoints) {
std::string json_string;
for(std::vector<unsigned int>::const_iterator codepoint = codepoints.begin() ; codepoint != codepoints.end() ; ++ codepoint) {
if(*codepoint == 8) { // \b
json_string.push_back('\\');
json_string.push_back('b');
}
else if(*codepoint == 9) { // \t
json_string.push_back('\\');
json_string.push_back('t');
}
else if(*codepoint == 10) { // \n
json_string.push_back('\\');
json_string.push_back('n');
}
else if(*codepoint == 12) { // \f
json_string.push_back('\\');
json_string.push_back('f');
}
else if(*codepoint == 13) { // \r
json_string.push_back('\\');
json_string.push_back('r');
}
else if(*codepoint == 34) { // "
json_string.push_back('\\');
json_string.push_back('"');
}
else if(*codepoint == 47) { // /
json_string.push_back('\\');
json_string.push_back('/');
}
else if(*codepoint == 92) {
json_string.push_back('\\');
json_string.push_back('\\');
}
else if(*codepoint < 32 || *codepoint == 127 || (*codepoint >= 128 && *codepoint <= 159)) {
json_string += json_encode_control_char(*codepoint);
}
else {
json_string += utf8_encode(*codepoint);
}
}
return json_string;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment