Skip to content

Instantly share code, notes, and snippets.

@marchelbling
Last active Apr 1, 2022
Embed
What would you like to do?
C++ json writer

why?

This code implements a naive JSON writer in C++ complying with RFC 4627. I wrote this as I believe it is a very good example of a real life problem involving lots of C++ constructs. This sample only supports writing JSON and does not support heterogenous ‘object’ serialization and extension are left as an exercice. See Writing json in C++ for some details.

what?

  • json_stream: a std::ofstream wrapper fulfilling RFC 4627 constraints;
  • utf8_json: some code to decode/“json encode” std::string UTF-8 buffers
  • json_test.cpp: a very simple program testing the code

how?

Some part of the code require C++11 (std::true_type, u8"") e.g. clang++ -std=c++11 json_test.cpp && ./a.out && cat /tmp/toto

#include <string>
#include <sstream>
#include <fstream>
#include <vector>
#include <map>
#include <cmath>
#include <limits>
#include "utf8_json"
template<typename T>
struct is_container : std::false_type {};
template<typename T>
struct is_associative_container : std::false_type {};
// vector overload
template<typename T>
struct is_container< std::vector<T> > : std::true_type {};
// map overload
template<typename K, typename V>
struct is_container< std::map<K, V> > : std::true_type {};
template<typename V>
struct is_associative_container< std::map<std::string, V> > : std::true_type {};
class json_stream {
public:
json_stream(const std::string& path) : _stream(path.c_str())
{}
template<typename T>
json_stream& operator<<(const T& data) {
if (_stream.is_open()) {
_stream << dump(data);
}
return *this;
}
private:
// forward standard manipulator like std::endl
typedef std::ostream& (*ostream_manipulator)(std::ostream&);
json_stream& operator<<(ostream_manipulator pf) {
if (_stream.is_open()) {
_stream << pf;
}
return *this;
}
template<typename T>
std::string dump(const T& t) const {
// dispatch to actual dump method:
// * not iterable type dumped as simple value
// * iterable type
// * with mapped value dumped as mapped_container
// * otherwise dumped as simple_container
return dump_value_or_container(t, typename is_container<T>::type());
}
// dispatch to correct dump method
template<typename T>
std::string dump_value_or_container(const T& t, std::false_type) const {
return dump_value(t);
}
template<typename T>
std::string dump_value_or_container(const T& t, std::true_type) const {
return dump_simple_or_associative_container(t, typename is_associative_container<T>::type());
}
template<typename T>
std::string dump_simple_or_associative_container(const T& t, std::false_type) const {
return dump_simple_container(t);
}
template<typename T>
std::string dump_simple_or_associative_container(const T& t, std::true_type) const {
return dump_associative_container(t);
}
// implement type specific serialization
template<typename V>
std::string dump_value(const V& value) const {
std::ostringstream oss;
oss << sanitize(value);
return oss.str();
}
std::string dump_value(const std::string& value) const {
return "\"" + sanitize(value) + "\"";
}
template<typename K, typename V>
std::string dump_value(const std::pair<const K, V>& pair) const {
std::ostringstream oss;
oss << "[" << dump(pair.first) << ", " << dump(pair.second) << "]";
return oss.str();
}
template<typename V>
std::string dump_pair(const std::pair<const std::string, V>& pair) const {
std::ostringstream oss;
oss << dump(pair.first) << ": " << dump(pair.second);
return oss.str();
}
template<typename C>
std::string dump_simple_container(const C& container) const
{
std::ostringstream oss;
typename C::const_iterator it = container.begin();
oss << "[" << dump(*it);
for (++ it ; it != container.end() ; ++ it) {
oss << ", " << dump(*it);
}
oss << "]";
return oss.str();
}
template<typename M>
std::string dump_associative_container(const M& map) const
{
std::ostringstream oss;
typename M::const_iterator it = map.begin();
oss << "{" << dump_pair(*it);
for (++ it ; it != map.end() ; ++ it) {
oss << ", " << dump_pair(*it);
}
oss << "}";
return oss.str();
}
template<typename T>
T sanitize(const T& t) const {
return t;
}
template <typename T>
int sgn(const T& val) const {
return (T(0) < val) - (val < T(0));
}
double sanitize(const double d) const {
if(std::isfinite(d)) {
return d;
}
else {
if(std::isinf(d)) {
return sgn(d) * std::numeric_limits<double>::max();
}
return 0.;
}
}
double sanitize(const float f) const {
return sanitize(static_cast<double>(f));
}
std::string sanitize(std::string const& input) const {
return utf8_json::json_encode_codepoints(utf8_json::decode_utf8(input));
}
std::ofstream _stream;
};
#include "json_stream"
#include <map>
#include <vector>
#include <iostream>
int main() {
json_stream js(std::string("/tmp/toto"));
std::map< std::string, std::vector<float> > object;
std::vector<float> data = { 1., std::numeric_limits<double>::quiet_NaN(), -std::numeric_limits<float>::infinity() };
std::string name = u8"foo+é+\n\r\b\t+\v\0+∞";
object[name] = data;
js << object;
return 0;
}
#include <string>
#include <sstream>
#include <iomanip>
namespace utf8_json {
inline unsigned int mask8(char const value) {
return value & 0xff;
}
inline bool is_valid_continuation_byte(unsigned int byte) {
return ((byte & 0xC0) == 0x80);
}
inline int get_next_byte(std::string::const_iterator& iterator, std::string::const_iterator end_iterator) {
if(iterator != end_iterator) {
return mask8(*(++ iterator));
}
else {
return 0; // invalid continuation byte
}
}
void insert_replacement(std::vector<unsigned int>& output, unsigned int replacement, unsigned int count) {
for(unsigned int i = 0 ; i < count ; ++ i) {
output.push_back(replacement);
}
}
std::vector<unsigned int> decode_utf8(const std::string& input, const int replacement=0xfffd) {
unsigned int code_unit1, code_unit2, code_unit3, code_unit4;
std::vector<unsigned int> codepoints;
for(std::string::const_iterator iterator = input.begin() ; iterator != input.end() ; ++ iterator) {
code_unit1 = mask8(*iterator);
if (code_unit1 < 0x80) {
codepoints.push_back(code_unit1);
}
else if (code_unit1 < 0xC2) { // continuation or overlong 2-byte sequence
codepoints.push_back(replacement);
}
else if (code_unit1 < 0xE0) { // 2-byte sequence
code_unit2 = get_next_byte(iterator, input.end());
if (!is_valid_continuation_byte(code_unit2)) {
insert_replacement(codepoints, replacement, 2);
}
else {
codepoints.push_back((code_unit1 << 6) + code_unit2 - 0x3080);
}
}
else if (code_unit1 < 0xF0) { // 3-byte sequence
code_unit2 = get_next_byte(iterator, input.end());
if (!is_valid_continuation_byte(code_unit2) ||
(code_unit1 == 0xE0 && code_unit2 < 0xA0)) /* overlong */ {
insert_replacement(codepoints, replacement, 2);
}
else {
code_unit3 = get_next_byte(iterator, input.end());
if (!is_valid_continuation_byte(code_unit3)) {
insert_replacement(codepoints, replacement, 3);
}
else {
codepoints.push_back((code_unit1 << 12) + (code_unit2 << 6) + code_unit3 - 0xE2080);
}
}
}
else if (code_unit1 < 0xF5) { // 4-byte sequence
code_unit2 = get_next_byte(iterator, input.end());
if(!is_valid_continuation_byte(code_unit2) ||
(code_unit1 == 0xF0 && code_unit2 < 0x90) || /* overlong */
(code_unit1 == 0xF4 && code_unit2 >= 0x90)) { /* > U+10FFFF */
insert_replacement(codepoints, replacement, 2);
}
else {
code_unit3 = get_next_byte(iterator, input.end());
if(!is_valid_continuation_byte(code_unit3)) {
insert_replacement(codepoints, replacement, 3);
}
else {
code_unit4 = get_next_byte(iterator, input.end());
if(!is_valid_continuation_byte(code_unit4)) {
insert_replacement(codepoints, replacement, 4);
}
else {
codepoints.push_back((code_unit1 << 18) + (code_unit2 << 12) + (code_unit3 << 6) + code_unit4 - 0x3C82080);
}
}
}
}
else {
/* > U+10FFFF */
insert_replacement(codepoints, replacement, 1);
}
}
return codepoints;
}
std::string json_encode_control_char(unsigned int codepoint) {
std::ostringstream oss;
oss.fill('0');
oss << "\\u" << std::setw(4) << std::hex << codepoint;
return oss.str();
}
std::string utf8_encode(unsigned int codepoint) {
std::string output;
if(codepoint > 0x590 && codepoint < 0x5F4) {
return output;
}
// out of range
if(codepoint > 1114112) {
return utf8_encode(0xfffd);
}
if (codepoint < 0x80) {
output.push_back(codepoint);
}
else if (codepoint <= 0x7FF) {
output.push_back((codepoint >> 6) + 0xC0);
output.push_back((codepoint & 0x3F) + 0x80);
}
else if (codepoint <= 0xFFFF) {
output.push_back((codepoint >> 12) + 0xE0);
output.push_back(((codepoint >> 6) & 0x3F) + 0x80);
output.push_back((codepoint & 0x3F) + 0x80);
}
else if (codepoint <= 0x10FFFF) {
output.push_back((codepoint >> 18) + 0xF0);
output.push_back(((codepoint >> 12) & 0x3F) + 0x80);
output.push_back(((codepoint >> 6) & 0x3F) + 0x80);
output.push_back((codepoint & 0x3F) + 0x80);
}
return output;
}
std::string json_encode_codepoints(std::vector<unsigned int> const& codepoints) {
std::string json_string;
for(std::vector<unsigned int>::const_iterator codepoint = codepoints.begin() ; codepoint != codepoints.end() ; ++ codepoint) {
if(*codepoint == 8) { // \b
json_string.push_back('\\');
json_string.push_back('b');
}
else if(*codepoint == 9) { // \t
json_string.push_back('\\');
json_string.push_back('t');
}
else if(*codepoint == 10) { // \n
json_string.push_back('\\');
json_string.push_back('n');
}
else if(*codepoint == 12) { // \f
json_string.push_back('\\');
json_string.push_back('f');
}
else if(*codepoint == 13) { // \r
json_string.push_back('\\');
json_string.push_back('r');
}
else if(*codepoint == 34) { // "
json_string.push_back('\\');
json_string.push_back('"');
}
else if(*codepoint == 47) { // /
json_string.push_back('\\');
json_string.push_back('/');
}
else if(*codepoint == 92) {
json_string.push_back('\\');
json_string.push_back('\\');
}
else if(*codepoint < 32 || *codepoint == 127 || (*codepoint >= 128 && *codepoint <= 159)) {
json_string += json_encode_control_char(*codepoint);
}
else {
json_string += utf8_encode(*codepoint);
}
}
return json_string;
}
}
@guest271314
Copy link

guest271314 commented Mar 26, 2022

Can C++ json writer be used as a substitute for <nlohmann/json.hpp> (https://discourse.mozilla.org/t/webextension-with-native-messaging-c-app-side/30821) to parse Native Messaging protocol?

@marchelbling
Copy link
Author

marchelbling commented Mar 31, 2022

I'm not familiar with the Native Messaging protocol; by the look of it, it seems rather simple so I would think it could work. Then this piece code was probably no as well tested (similar code was used in some production system tho) as the nlohmann/json.hpp library so if you use this, test it thoroughly for your needs before or use it at your own risk.

@guest271314
Copy link

guest271314 commented Apr 1, 2022

The issue was passing JSON from Native Messaging client to host then calling popen() with the string. Removing beginning and ending double quotes achievesthe requirement for JSON stringinput from JavaScript

string getMessage() {
  char length[4];
  fread(length, 4, sizeof(char), stdin);
  uint32_t len = *reinterpret_cast<uint32_t*>(length);
  if (!len) {
    exit(EXIT_SUCCESS);
  }
  char message[len];
  fread(message, len, sizeof(char), stdin);
  string content(message, message + sizeof message / sizeof message[0]);
  return content;
}

int main() {
  while (true) {
    auto message = getMessage();
    const char* command = message.data();
    stringstream input;
    // Exclude double quotation marks from beginning and end of string
    for (int j = 1; j < message.length() - 1; j++) {
      input << message[j];
    }
    FILE* pipe = popen(input.str().c_str(), "r");
    // ...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment