Skip to content

Instantly share code, notes, and snippets.

@jbelloncastro jbelloncastro/sax_parse.cc
Last active Aug 28, 2019

Embed
What would you like to do?
nlohmann::json SAX parsing example
#ifndef SAX_H
#define SAX_H
#include <nlohmann/json.hpp>
#include <bitset>
#include <string_view>
#include <sstream>
template < class T >
struct sax_reader;
// Throws any exception no matter what kind of parsing event is
template <>
struct sax_reader<void> : public nlohmann::json_sax<nlohmann::json> {
[[noreturn]] void fail( const char* funcname ) { throw std::runtime_error(std::string("Error: unexpected ") + funcname); }
bool null() { fail(__func__); }
bool boolean(bool) { fail(__func__); }
bool start_array(size_t) { fail(__func__); }
bool end_array() { fail(__func__); }
bool number_integer(number_integer_t) { fail(__func__); }
bool number_unsigned(number_unsigned_t) { fail(__func__); }
bool number_float(number_float_t, const std::string&) { fail(__func__); }
bool string(std::string&) { fail(__func__); }
bool start_object(size_t) { fail(__func__); }
bool key( std::string& ) { fail(__func__); }
bool end_object() { fail(__func__); }
bool parse_error(std::size_t position,
const std::string& last_token,
const nlohmann::detail::exception& ex)
{
std::stringstream ss("Parse error. ");
ss << "At: " << position
<< "; last token: " << last_token
<< "; error: " << ex.what();
throw std::runtime_error(ss.str());
}
};
// Specialization for signed integer numbers
template <>
struct sax_reader<int> final : public sax_reader<void> {
int value() const { return _value; }
bool number_integer(number_integer_t val) {
_value = val;
return false;
}
bool number_unsigned(number_unsigned_t val) {
_value = val;
return false;
}
private:
int _value;
};
// Specialization for unsigned integer numbers
template <>
struct sax_reader<unsigned int> final : public sax_reader<void> {
unsigned int value() const { return _value; }
bool number_unsigned(number_unsigned_t val) {
_value = val;
return false;
}
private:
unsigned int _value;
};
// Specialization for floating point numbers
template <>
struct sax_reader<float> final : public sax_reader<void> {
float value() const { return _value; }
bool number_integer(number_integer_t val) {
_value = val;
return false;
}
bool number_unsigned(number_unsigned_t val) {
_value = val;
return false;
}
bool number_float(number_float_t val, const std::string&) {
_value = val;
return false;
}
private:
float _value;
};
// Specialization for strings
template <>
struct sax_reader<std::string> final : public sax_reader<void> {
std::string value() const { return _value; }
bool string(std::string& val) {
_value = std::move(val);
return false;
}
private:
std::string _value;
};
template < class T >
struct sax_reader<std::vector<T>> final : public sax_reader<void> {
private:
enum expect_token {
expected_start_array,
expected_end_array,
error,
expected_element
};
public:
constexpr sax_reader() = default;
size_t size() const {
return _elements.size();
}
std::vector<T> value() const {
std::vector<T> result;
for( auto& reader : _elements ) {
result.push_back( reader.value() );
}
return result;
}
template < class OutputIt >
void value( OutputIt out ) const {
for( auto& reader : _elements ) {
out = reader.value();
out++;
}
}
template < class F >
void action_element( F&& action ) {
bool needs_more = action(_next);
if( !needs_more ) {
_elements.push_back(std::move(_next));
_next = {};
_state = expected_end_array;
}
}
bool null() {
auto action = []( auto& element ) {
return element.null();
};
switch( _state ) {
case expected_end_array:
_state = expected_element;
// Fallthrough
case expected_element:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool boolean(bool val) {
auto action = [=]( auto& element ) {
return element.boolean(val);
};
switch( _state ) {
case expected_end_array:
_state = expected_element;
// Fallthrough
case expected_element:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool start_array(size_t elements) {
auto action = [=]( auto& element ) {
return element.start_array(elements);
};
switch( _state ) {
case expected_start_array:
_state = expected_element;
return true;
case expected_end_array: // array of arrays
_state = expected_element;
// Fallthrough
case expected_element:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool end_array() {
auto action = []( auto& element ) {
return element.end_array();
};
switch( _state ) {
case expected_end_array:
// We are done
return false;
case expected_element:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool number_integer(number_integer_t val) {
auto action = [=]( auto& element ) {
return element.number_integer(val);
};
switch( _state ) {
case expected_end_array:
_state = expected_element;
// Fallthrough
case expected_element:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool number_unsigned(number_unsigned_t val) {
auto action = [=]( auto& element ) {
return element.number_unsigned(val);
};
switch( _state ) {
case expected_end_array:
_state = expected_element;
// Fallthrough
case expected_element:
action_element(action);
return true;
default:
fail(__func__);
}
}
// Float
bool number_float(number_float_t val, const std::string& s) {
auto action = [&]( auto& element ) {
return element.number_float(val,s);
};
switch( _state ) {
case expected_end_array:
_state = expected_element;
// Fallthrough
case expected_element:
action_element(action);
return true;
default:
fail(__func__);
}
}
// String
bool string(std::string& val) {
auto action = [&]( auto& element ) {
return element.string(val);
};
switch( _state ) {
case expected_end_array:
_state = expected_element;
// Fallthrough
case expected_element:
action_element(action);
return true;
default:
fail(__func__);
}
}
// Object
bool start_object(size_t elements) {
auto action = [=]( auto& element ) {
return element.start_object(elements);
};
switch( _state ) {
case expected_end_array:
_state = expected_element;
// Fallthrough
case expected_element:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool key( std::string& val ) {
auto action = [&]( auto& element ) {
return element.key(val);
};
switch( _state ) {
case expected_element:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool end_object() {
auto action = []( auto& element ) {
return element.end_object();
};
switch( _state ) {
case expected_element:
action_element(action);
return true;
default:
fail(__func__);
}
}
private:
expect_token _state;
sax_reader<T> _next;
std::vector<sax_reader<T>> _elements;
};
template < class K, class V, class C, class A >
struct sax_reader<std::map<K,V,C,A>> final : public sax_reader<void> {
private:
enum expect_token {
expected_start_object,
expected_end_object,
error,
expected_key,
expected_value
};
public:
constexpr sax_reader() :
_state(expected_start_object),
_next_key(),
_next_value(),
_elements()
{
}
size_t size() const {
return _elements.size();
}
std::map<K,V,C,A> value() const {
std::map<K,V,C,A> result;
for( auto& e : _elements ) {
result.emplace( std::piecewise_construct,
std::forward_as_tuple(std::get<0>(e)),
std::forward_as_tuple(std::get<1>(e).value()) );
}
return result;
}
template < class OutputIt >
void value( OutputIt out ) const {
for( auto& reader : _elements ) {
out = reader.value();
out++;
}
}
template < class F >
void action_element( F&& action ) {
bool needs_more = action(_next_value);
if( !needs_more ) {
_elements.emplace_back(
std::piecewise_construct,
std::forward_as_tuple(std::move(_next_key)),
std::forward_as_tuple(std::move(_next_value)));
_next_value = {};
_state = expected_key;
}
}
bool null() {
auto action = []( auto& element ) {
return element.null();
};
switch( _state ) {
case expected_value:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool boolean(bool val) {
auto action = [=]( auto& element ) {
return element.boolean(val);
};
switch( _state ) {
case expected_value:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool start_array(size_t elements) {
auto action = [=]( auto& element ) {
return element.start_array(elements);
};
switch( _state ) {
case expected_value:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool end_array() {
auto action = []( auto& element ) {
return element.end_array();
};
switch( _state ) {
case expected_value:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool number_integer(number_integer_t val) {
auto action = [=]( auto& element ) {
return element.number_integer(val);
};
switch( _state ) {
case expected_value:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool number_unsigned(number_unsigned_t val) {
auto action = [=]( auto& element ) {
return element.number_unsigned(val);
};
switch( _state ) {
case expected_value:
action_element(action);
return true;
default:
fail(__func__);
}
}
// Float
bool number_float(number_float_t val, const std::string& s) {
auto action = [&]( auto& element ) {
return element.number_float(val,s);
};
switch( _state ) {
case expected_value:
action_element(action);
return true;
default:
fail(__func__);
}
}
// String
bool string(std::string& val) {
auto action = [&]( auto& element ) {
return element.string(val);
};
switch( _state ) {
case expected_value:
action_element(action);
return true;
default:
fail(__func__);
}
}
// Object
bool start_object(size_t elements) {
auto action = [=]( auto& element ) {
return element.start_object(elements);
};
switch( _state ) {
case expected_start_object:
_state = expected_key;
return true;
case expected_value:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool key( std::string& val ) {
auto action = [&]( auto& element ) {
return element.key(val);
};
switch( _state ) {
case expected_key:
_next_key = std::move(val);
_state = expected_value;
return true;
case expected_value:
action_element(action);
return true;
default:
fail(__func__);
}
}
bool end_object() {
auto action = []( auto& element ) {
return element.end_object();
};
switch( _state ) {
case expected_key:
// Completed
return false;
case expected_value:
action_element(action);
return true;
default:
fail(__func__);
}
}
private:
expect_token _state;
std::string _next_key;
sax_reader<V> _next_value;
std::vector<std::pair<std::string,sax_reader<V>>> _elements;
};
template < class... Members >
struct sax_reader<std::tuple<Members...>> : public sax_reader<void> {
private:
static constexpr size_t N = sizeof...(Members);
enum expect_token {
expected_start_object,
expected_end_object,
expected_key,
error,
expected_member
};
constexpr static expect_token expect_member( size_t index ) {
return static_cast<expect_token>(expected_member + index);
}
template < class F, size_t... Is >
void expect_switch( F&& action, std::index_sequence<Is...> ) {
std::initializer_list<int> /* unused */{
(_state == expect_member(Is)?
action(std::get<Is>(_members), _has_value[Is]), 0
: 0
)...
};
}
template < class F >
void expect_switch( F&& action ) {
return expect_switch(std::forward<F>(action),
std::index_sequence_for<Members...>());
}
public:
constexpr
sax_reader( std::array<std::string_view,N> keys ) :
_state(expected_start_object),
_has_value(),
_keys(keys),
_members()
{
}
template < size_t... Is >
std::tuple<Members...> value(std::index_sequence<Is...>) const {
return std::make_tuple(
std::get<Is>(_members).value()...
);
}
std::tuple<Members...> value() const {
return value(std::index_sequence_for<Members...>());
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wswitch"
bool null() {
auto action = [&]( auto& member, auto has_value ) {
bool needs_more = member.null();
has_value = !needs_more;
if( has_value ) {
_state = expected_key;
}
};
switch( _state ) {
case expect_member(0) ... expect_member(N):
expect_switch(action);
break;
default:
fail(__func__);
}
return true;
}
bool boolean(bool val) {
auto action = [&]( auto& member, auto has_value ) {
bool needs_more = member.boolean(val);
has_value = !needs_more;
if( has_value ) {
_state = expected_key;
}
};
switch( _state ) {
case expect_member(0) ... expect_member(N):
expect_switch(action);
break;
default:
fail(__func__);
}
return true;
}
bool start_array(size_t elements) {
auto action = [&]( auto& member, auto has_value ) {
bool needs_more = member.start_array(elements);
has_value = !needs_more;
if( has_value ) {
_state = expected_key;
}
};
switch( _state ) {
case expect_member(0) ... expect_member(N):
expect_switch(action);
break;
default:
fail(__func__);
}
return true;
}
bool end_array() {
auto action = [&]( auto& member, auto has_value ) {
bool needs_more = member.end_array();
has_value = !needs_more;
if( has_value ) {
_state = expected_key;
}
};
switch( _state ) {
case expect_member(0) ... expect_member(N):
expect_switch(action);
break;
default:
fail(__func__);
}
return true;
}
bool number_integer(number_integer_t val) {
auto action = [&]( auto& member, auto has_value ) {
bool needs_more = member.number_integer(val);
has_value = !needs_more;
if( has_value ) {
_state = expected_key;
}
};
switch( _state ) {
case expect_member(0) ... expect_member(N):
expect_switch(action);
break;
default:
fail(__func__);
}
return true;
}
bool number_unsigned(number_unsigned_t val) {
auto action = [&]( auto& member, auto has_value ) {
bool needs_more = member.number_unsigned(val);
has_value = !needs_more;
if( has_value ) {
_state = expected_key;
}
};
switch( _state ) {
case expect_member(0) ... expect_member(N):
expect_switch(action);
break;
default:
fail(__func__);
}
return true;
}
// Float
bool number_float(number_float_t val, const std::string& s) {
auto action = [&]( auto& member, auto has_value ) {
bool needs_more = member.number_float(val, s);
has_value = !needs_more;
if( has_value ) {
_state = expected_key;
}
};
switch( _state ) {
case expect_member(0) ... expect_member(N):
expect_switch(action);
break;
default:
fail(__func__);
}
_state = expected_key;
return true;
}
// String
bool string(std::string& val) {
auto action = [&]( auto& member, auto has_value ) {
bool needs_more = member.string(val);
has_value = !needs_more;
if( has_value ) {
_state = expected_key;
}
};
switch( _state ) {
case expect_member(0) ... expect_member(N):
expect_switch(action);
break;
default:
fail(__func__);
}
_state = expected_key;
return true;
}
// Object
bool start_object(size_t elements) {
auto action = [&]( auto& member, auto has_value ) {
bool needs_more = member.start_object(elements);
has_value = !needs_more;
if( has_value ) {
_state = expected_key;
}
};
switch( _state ) {
case expected_start_object:
_state = expected_key;
break;
case expect_member(0) ... expect_member(N):
expect_switch(action);
break;
default:
fail(__func__);
}
return true;
}
bool key( std::string& val ) {
auto action = [&]( auto& member, auto has_value ) {
bool needs_more = member.key(val);
has_value = !needs_more;
if( has_value ) {
_state = expected_key;
}
};
switch( _state ) {
case expected_key:
{
auto pos = std::find(_keys.begin(), _keys.end(), val);
if( pos != _keys.end() ) {
size_t member_i = std::distance(_keys.begin(), pos);
_state = expect_member(member_i);
return true;
}
}
// Fallthrough
// (key not found)
default:
fail(__func__);
case expect_member(0) ... expect_member(N):
expect_switch(action);
break;
}
return true;
}
bool end_object() {
auto action = [&]( auto& member, auto has_value ) {
bool needs_more = member.end_object();
has_value = !needs_more;
if( has_value ) {
_state = expected_key;
}
};
switch( _state ) {
case expected_key:
if( _has_value.all() )
return false;
// Fallthrough
default:
fail(__func__);
case expect_member(0) ... expect_member(N):
expect_switch(action);
break;
}
return true;
}
#pragma GCC diagnostic pop
private:
expect_token _state;
std::bitset<3> _has_value;
std::array<std::string_view,N> _keys;
std::tuple< sax_reader<Members>... > _members;
};
#endif // SAX_H
#include <iostream>
struct A {
int a;
float b;
std::string c;
};
// Example A
const char example_a[] = R"(
{ "a": 1,
"b": 2.03,
"c": "a"
}
)";
struct B {
A a;
float b;
std::string c;
};
// Example B
const char example_b[] = R"(
{
"a": { "a": 1, "b": 2.03, "c": "a" },
"b": 2.03,
"c": "a"
}
)";
// Example array
const char example_array[] = R"(
[0,1,2,3,4,5,6,7,8,9,10]
)";
// Example map
const char example_map[] = R"(
{
"a": 1,
"b": 2,
"c": 3
}
)";
template<>
struct sax_reader<A> final : public sax_reader<std::tuple<int,float,std::string>> {
constexpr sax_reader() :
sax_reader<std::tuple<int,float,std::string>>({"a","b","c"})
{
}
~sax_reader() = default;
A value() const {
return std::apply([](auto... args) {
return A{args...};
},
sax_reader<std::tuple<int,float,std::string>>::value());
}
};
void print( const A& a ) {
std::cout << "a: " << a.a << ", b: " << a.b << ", c: " << a.c << "\n";
}
template < class T >
void print( const std::vector<T>& array ) {
for( auto& e : array ) {
std::cout << e << ", ";
}
std::cout << "\n";
}
template < class K, class V, class C, class A >
void print( const std::map<K,V,C,A>& map ) {
std::cout << "{\n";
for( auto& e : map ) {
std::cout << " " << e.first << ": " << e.second << ",\n";
}
std::cout << "}\n";
}
#define MAP
int main() {
#if defined(OBJECT_A)
std::string input = example_a;
sax_reader<A> reader;
#elif defined(ARRAY)
std::string input = example_array;
sax_reader<std::vector<int>> reader;
#elif defined(MAP)
std::string input = example_map;
sax_reader<std::map<std::string,int>> reader;
#endif
try {
bool not_done = true;
while( not_done ) {
not_done = nlohmann::json::sax_parse(input, &reader);
}
print(reader.value());
return 0;
} catch(...) {
return 1;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.