Last active
August 28, 2019 09:58
-
-
Save jbelloncastro/6ba5c953c5ea71a3741a5355a6478ed6 to your computer and use it in GitHub Desktop.
nlohmann::json SAX parsing example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef SAX_H | |
#define SAX_H | |
#include <nlohmann/json.hpp> | |
#include <bitset> | |
#include <string_view> | |
#include <sstream> | |
template < class T > | |
struct sax_reader; | |
// Throws any exception no matter what kind of parsing event is | |
template <> | |
struct sax_reader<void> : public nlohmann::json_sax<nlohmann::json> { | |
[[noreturn]] void fail( const char* funcname ) { throw std::runtime_error(std::string("Error: unexpected ") + funcname); } | |
bool null() { fail(__func__); } | |
bool boolean(bool) { fail(__func__); } | |
bool start_array(size_t) { fail(__func__); } | |
bool end_array() { fail(__func__); } | |
bool number_integer(number_integer_t) { fail(__func__); } | |
bool number_unsigned(number_unsigned_t) { fail(__func__); } | |
bool number_float(number_float_t, const std::string&) { fail(__func__); } | |
bool string(std::string&) { fail(__func__); } | |
bool start_object(size_t) { fail(__func__); } | |
bool key( std::string& ) { fail(__func__); } | |
bool end_object() { fail(__func__); } | |
bool parse_error(std::size_t position, | |
const std::string& last_token, | |
const nlohmann::detail::exception& ex) | |
{ | |
std::stringstream ss("Parse error. "); | |
ss << "At: " << position | |
<< "; last token: " << last_token | |
<< "; error: " << ex.what(); | |
throw std::runtime_error(ss.str()); | |
} | |
}; | |
// Specialization for signed integer numbers | |
template <> | |
struct sax_reader<int> final : public sax_reader<void> { | |
int value() const { return _value; } | |
bool number_integer(number_integer_t val) { | |
_value = val; | |
return false; | |
} | |
bool number_unsigned(number_unsigned_t val) { | |
_value = val; | |
return false; | |
} | |
private: | |
int _value; | |
}; | |
// Specialization for unsigned integer numbers | |
template <> | |
struct sax_reader<unsigned int> final : public sax_reader<void> { | |
unsigned int value() const { return _value; } | |
bool number_unsigned(number_unsigned_t val) { | |
_value = val; | |
return false; | |
} | |
private: | |
unsigned int _value; | |
}; | |
// Specialization for floating point numbers | |
template <> | |
struct sax_reader<float> final : public sax_reader<void> { | |
float value() const { return _value; } | |
bool number_integer(number_integer_t val) { | |
_value = val; | |
return false; | |
} | |
bool number_unsigned(number_unsigned_t val) { | |
_value = val; | |
return false; | |
} | |
bool number_float(number_float_t val, const std::string&) { | |
_value = val; | |
return false; | |
} | |
private: | |
float _value; | |
}; | |
// Specialization for strings | |
template <> | |
struct sax_reader<std::string> final : public sax_reader<void> { | |
std::string value() const { return _value; } | |
bool string(std::string& val) { | |
_value = std::move(val); | |
return false; | |
} | |
private: | |
std::string _value; | |
}; | |
template < class T > | |
struct sax_reader<std::vector<T>> final : public sax_reader<void> { | |
private: | |
enum expect_token { | |
expected_start_array, | |
expected_end_array, | |
error, | |
expected_element | |
}; | |
public: | |
constexpr sax_reader() = default; | |
size_t size() const { | |
return _elements.size(); | |
} | |
std::vector<T> value() const { | |
std::vector<T> result; | |
for( auto& reader : _elements ) { | |
result.push_back( reader.value() ); | |
} | |
return result; | |
} | |
template < class OutputIt > | |
void value( OutputIt out ) const { | |
for( auto& reader : _elements ) { | |
out = reader.value(); | |
out++; | |
} | |
} | |
template < class F > | |
void action_element( F&& action ) { | |
bool needs_more = action(_next); | |
if( !needs_more ) { | |
_elements.push_back(std::move(_next)); | |
_next = {}; | |
_state = expected_end_array; | |
} | |
} | |
bool null() { | |
auto action = []( auto& element ) { | |
return element.null(); | |
}; | |
switch( _state ) { | |
case expected_end_array: | |
_state = expected_element; | |
// Fallthrough | |
case expected_element: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool boolean(bool val) { | |
auto action = [=]( auto& element ) { | |
return element.boolean(val); | |
}; | |
switch( _state ) { | |
case expected_end_array: | |
_state = expected_element; | |
// Fallthrough | |
case expected_element: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool start_array(size_t elements) { | |
auto action = [=]( auto& element ) { | |
return element.start_array(elements); | |
}; | |
switch( _state ) { | |
case expected_start_array: | |
_state = expected_element; | |
return true; | |
case expected_end_array: // array of arrays | |
_state = expected_element; | |
// Fallthrough | |
case expected_element: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool end_array() { | |
auto action = []( auto& element ) { | |
return element.end_array(); | |
}; | |
switch( _state ) { | |
case expected_end_array: | |
// We are done | |
return false; | |
case expected_element: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool number_integer(number_integer_t val) { | |
auto action = [=]( auto& element ) { | |
return element.number_integer(val); | |
}; | |
switch( _state ) { | |
case expected_end_array: | |
_state = expected_element; | |
// Fallthrough | |
case expected_element: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool number_unsigned(number_unsigned_t val) { | |
auto action = [=]( auto& element ) { | |
return element.number_unsigned(val); | |
}; | |
switch( _state ) { | |
case expected_end_array: | |
_state = expected_element; | |
// Fallthrough | |
case expected_element: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
// Float | |
bool number_float(number_float_t val, const std::string& s) { | |
auto action = [&]( auto& element ) { | |
return element.number_float(val,s); | |
}; | |
switch( _state ) { | |
case expected_end_array: | |
_state = expected_element; | |
// Fallthrough | |
case expected_element: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
// String | |
bool string(std::string& val) { | |
auto action = [&]( auto& element ) { | |
return element.string(val); | |
}; | |
switch( _state ) { | |
case expected_end_array: | |
_state = expected_element; | |
// Fallthrough | |
case expected_element: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
// Object | |
bool start_object(size_t elements) { | |
auto action = [=]( auto& element ) { | |
return element.start_object(elements); | |
}; | |
switch( _state ) { | |
case expected_end_array: | |
_state = expected_element; | |
// Fallthrough | |
case expected_element: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool key( std::string& val ) { | |
auto action = [&]( auto& element ) { | |
return element.key(val); | |
}; | |
switch( _state ) { | |
case expected_element: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool end_object() { | |
auto action = []( auto& element ) { | |
return element.end_object(); | |
}; | |
switch( _state ) { | |
case expected_element: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
private: | |
expect_token _state; | |
sax_reader<T> _next; | |
std::vector<sax_reader<T>> _elements; | |
}; | |
template < class K, class V, class C, class A > | |
struct sax_reader<std::map<K,V,C,A>> final : public sax_reader<void> { | |
private: | |
enum expect_token { | |
expected_start_object, | |
expected_end_object, | |
error, | |
expected_key, | |
expected_value | |
}; | |
public: | |
constexpr sax_reader() : | |
_state(expected_start_object), | |
_next_key(), | |
_next_value(), | |
_elements() | |
{ | |
} | |
size_t size() const { | |
return _elements.size(); | |
} | |
std::map<K,V,C,A> value() const { | |
std::map<K,V,C,A> result; | |
for( auto& e : _elements ) { | |
result.emplace( std::piecewise_construct, | |
std::forward_as_tuple(std::get<0>(e)), | |
std::forward_as_tuple(std::get<1>(e).value()) ); | |
} | |
return result; | |
} | |
template < class OutputIt > | |
void value( OutputIt out ) const { | |
for( auto& reader : _elements ) { | |
out = reader.value(); | |
out++; | |
} | |
} | |
template < class F > | |
void action_element( F&& action ) { | |
bool needs_more = action(_next_value); | |
if( !needs_more ) { | |
_elements.emplace_back( | |
std::piecewise_construct, | |
std::forward_as_tuple(std::move(_next_key)), | |
std::forward_as_tuple(std::move(_next_value))); | |
_next_value = {}; | |
_state = expected_key; | |
} | |
} | |
bool null() { | |
auto action = []( auto& element ) { | |
return element.null(); | |
}; | |
switch( _state ) { | |
case expected_value: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool boolean(bool val) { | |
auto action = [=]( auto& element ) { | |
return element.boolean(val); | |
}; | |
switch( _state ) { | |
case expected_value: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool start_array(size_t elements) { | |
auto action = [=]( auto& element ) { | |
return element.start_array(elements); | |
}; | |
switch( _state ) { | |
case expected_value: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool end_array() { | |
auto action = []( auto& element ) { | |
return element.end_array(); | |
}; | |
switch( _state ) { | |
case expected_value: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool number_integer(number_integer_t val) { | |
auto action = [=]( auto& element ) { | |
return element.number_integer(val); | |
}; | |
switch( _state ) { | |
case expected_value: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool number_unsigned(number_unsigned_t val) { | |
auto action = [=]( auto& element ) { | |
return element.number_unsigned(val); | |
}; | |
switch( _state ) { | |
case expected_value: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
// Float | |
bool number_float(number_float_t val, const std::string& s) { | |
auto action = [&]( auto& element ) { | |
return element.number_float(val,s); | |
}; | |
switch( _state ) { | |
case expected_value: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
// String | |
bool string(std::string& val) { | |
auto action = [&]( auto& element ) { | |
return element.string(val); | |
}; | |
switch( _state ) { | |
case expected_value: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
// Object | |
bool start_object(size_t elements) { | |
auto action = [=]( auto& element ) { | |
return element.start_object(elements); | |
}; | |
switch( _state ) { | |
case expected_start_object: | |
_state = expected_key; | |
return true; | |
case expected_value: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool key( std::string& val ) { | |
auto action = [&]( auto& element ) { | |
return element.key(val); | |
}; | |
switch( _state ) { | |
case expected_key: | |
_next_key = std::move(val); | |
_state = expected_value; | |
return true; | |
case expected_value: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
bool end_object() { | |
auto action = []( auto& element ) { | |
return element.end_object(); | |
}; | |
switch( _state ) { | |
case expected_key: | |
// Completed | |
return false; | |
case expected_value: | |
action_element(action); | |
return true; | |
default: | |
fail(__func__); | |
} | |
} | |
private: | |
expect_token _state; | |
std::string _next_key; | |
sax_reader<V> _next_value; | |
std::vector<std::pair<std::string,sax_reader<V>>> _elements; | |
}; | |
template < class... Members > | |
struct sax_reader<std::tuple<Members...>> : public sax_reader<void> { | |
private: | |
static constexpr size_t N = sizeof...(Members); | |
enum expect_token { | |
expected_start_object, | |
expected_end_object, | |
expected_key, | |
error, | |
expected_member | |
}; | |
constexpr static expect_token expect_member( size_t index ) { | |
return static_cast<expect_token>(expected_member + index); | |
} | |
template < class F, size_t... Is > | |
void expect_switch( F&& action, std::index_sequence<Is...> ) { | |
std::initializer_list<int> /* unused */{ | |
(_state == expect_member(Is)? | |
action(std::get<Is>(_members), _has_value[Is]), 0 | |
: 0 | |
)... | |
}; | |
} | |
template < class F > | |
void expect_switch( F&& action ) { | |
return expect_switch(std::forward<F>(action), | |
std::index_sequence_for<Members...>()); | |
} | |
public: | |
constexpr | |
sax_reader( std::array<std::string_view,N> keys ) : | |
_state(expected_start_object), | |
_has_value(), | |
_keys(keys), | |
_members() | |
{ | |
} | |
template < size_t... Is > | |
std::tuple<Members...> value(std::index_sequence<Is...>) const { | |
return std::make_tuple( | |
std::get<Is>(_members).value()... | |
); | |
} | |
std::tuple<Members...> value() const { | |
return value(std::index_sequence_for<Members...>()); | |
} | |
#pragma GCC diagnostic push | |
#pragma GCC diagnostic ignored "-Wswitch" | |
bool null() { | |
auto action = [&]( auto& member, auto has_value ) { | |
bool needs_more = member.null(); | |
has_value = !needs_more; | |
if( has_value ) { | |
_state = expected_key; | |
} | |
}; | |
switch( _state ) { | |
case expect_member(0) ... expect_member(N): | |
expect_switch(action); | |
break; | |
default: | |
fail(__func__); | |
} | |
return true; | |
} | |
bool boolean(bool val) { | |
auto action = [&]( auto& member, auto has_value ) { | |
bool needs_more = member.boolean(val); | |
has_value = !needs_more; | |
if( has_value ) { | |
_state = expected_key; | |
} | |
}; | |
switch( _state ) { | |
case expect_member(0) ... expect_member(N): | |
expect_switch(action); | |
break; | |
default: | |
fail(__func__); | |
} | |
return true; | |
} | |
bool start_array(size_t elements) { | |
auto action = [&]( auto& member, auto has_value ) { | |
bool needs_more = member.start_array(elements); | |
has_value = !needs_more; | |
if( has_value ) { | |
_state = expected_key; | |
} | |
}; | |
switch( _state ) { | |
case expect_member(0) ... expect_member(N): | |
expect_switch(action); | |
break; | |
default: | |
fail(__func__); | |
} | |
return true; | |
} | |
bool end_array() { | |
auto action = [&]( auto& member, auto has_value ) { | |
bool needs_more = member.end_array(); | |
has_value = !needs_more; | |
if( has_value ) { | |
_state = expected_key; | |
} | |
}; | |
switch( _state ) { | |
case expect_member(0) ... expect_member(N): | |
expect_switch(action); | |
break; | |
default: | |
fail(__func__); | |
} | |
return true; | |
} | |
bool number_integer(number_integer_t val) { | |
auto action = [&]( auto& member, auto has_value ) { | |
bool needs_more = member.number_integer(val); | |
has_value = !needs_more; | |
if( has_value ) { | |
_state = expected_key; | |
} | |
}; | |
switch( _state ) { | |
case expect_member(0) ... expect_member(N): | |
expect_switch(action); | |
break; | |
default: | |
fail(__func__); | |
} | |
return true; | |
} | |
bool number_unsigned(number_unsigned_t val) { | |
auto action = [&]( auto& member, auto has_value ) { | |
bool needs_more = member.number_unsigned(val); | |
has_value = !needs_more; | |
if( has_value ) { | |
_state = expected_key; | |
} | |
}; | |
switch( _state ) { | |
case expect_member(0) ... expect_member(N): | |
expect_switch(action); | |
break; | |
default: | |
fail(__func__); | |
} | |
return true; | |
} | |
// Float | |
bool number_float(number_float_t val, const std::string& s) { | |
auto action = [&]( auto& member, auto has_value ) { | |
bool needs_more = member.number_float(val, s); | |
has_value = !needs_more; | |
if( has_value ) { | |
_state = expected_key; | |
} | |
}; | |
switch( _state ) { | |
case expect_member(0) ... expect_member(N): | |
expect_switch(action); | |
break; | |
default: | |
fail(__func__); | |
} | |
_state = expected_key; | |
return true; | |
} | |
// String | |
bool string(std::string& val) { | |
auto action = [&]( auto& member, auto has_value ) { | |
bool needs_more = member.string(val); | |
has_value = !needs_more; | |
if( has_value ) { | |
_state = expected_key; | |
} | |
}; | |
switch( _state ) { | |
case expect_member(0) ... expect_member(N): | |
expect_switch(action); | |
break; | |
default: | |
fail(__func__); | |
} | |
_state = expected_key; | |
return true; | |
} | |
// Object | |
bool start_object(size_t elements) { | |
auto action = [&]( auto& member, auto has_value ) { | |
bool needs_more = member.start_object(elements); | |
has_value = !needs_more; | |
if( has_value ) { | |
_state = expected_key; | |
} | |
}; | |
switch( _state ) { | |
case expected_start_object: | |
_state = expected_key; | |
break; | |
case expect_member(0) ... expect_member(N): | |
expect_switch(action); | |
break; | |
default: | |
fail(__func__); | |
} | |
return true; | |
} | |
bool key( std::string& val ) { | |
auto action = [&]( auto& member, auto has_value ) { | |
bool needs_more = member.key(val); | |
has_value = !needs_more; | |
if( has_value ) { | |
_state = expected_key; | |
} | |
}; | |
switch( _state ) { | |
case expected_key: | |
{ | |
auto pos = std::find(_keys.begin(), _keys.end(), val); | |
if( pos != _keys.end() ) { | |
size_t member_i = std::distance(_keys.begin(), pos); | |
_state = expect_member(member_i); | |
return true; | |
} | |
} | |
// Fallthrough | |
// (key not found) | |
default: | |
fail(__func__); | |
case expect_member(0) ... expect_member(N): | |
expect_switch(action); | |
break; | |
} | |
return true; | |
} | |
bool end_object() { | |
auto action = [&]( auto& member, auto has_value ) { | |
bool needs_more = member.end_object(); | |
has_value = !needs_more; | |
if( has_value ) { | |
_state = expected_key; | |
} | |
}; | |
switch( _state ) { | |
case expected_key: | |
if( _has_value.all() ) | |
return false; | |
// Fallthrough | |
default: | |
fail(__func__); | |
case expect_member(0) ... expect_member(N): | |
expect_switch(action); | |
break; | |
} | |
return true; | |
} | |
#pragma GCC diagnostic pop | |
private: | |
expect_token _state; | |
std::bitset<3> _has_value; | |
std::array<std::string_view,N> _keys; | |
std::tuple< sax_reader<Members>... > _members; | |
}; | |
#endif // SAX_H | |
#include <iostream> | |
struct A { | |
int a; | |
float b; | |
std::string c; | |
}; | |
// Example A | |
const char example_a[] = R"( | |
{ "a": 1, | |
"b": 2.03, | |
"c": "a" | |
} | |
)"; | |
struct B { | |
A a; | |
float b; | |
std::string c; | |
}; | |
// Example B | |
const char example_b[] = R"( | |
{ | |
"a": { "a": 1, "b": 2.03, "c": "a" }, | |
"b": 2.03, | |
"c": "a" | |
} | |
)"; | |
// Example array | |
const char example_array[] = R"( | |
[0,1,2,3,4,5,6,7,8,9,10] | |
)"; | |
// Example map | |
const char example_map[] = R"( | |
{ | |
"a": 1, | |
"b": 2, | |
"c": 3 | |
} | |
)"; | |
template<> | |
struct sax_reader<A> final : public sax_reader<std::tuple<int,float,std::string>> { | |
constexpr sax_reader() : | |
sax_reader<std::tuple<int,float,std::string>>({"a","b","c"}) | |
{ | |
} | |
~sax_reader() = default; | |
A value() const { | |
return std::apply([](auto... args) { | |
return A{args...}; | |
}, | |
sax_reader<std::tuple<int,float,std::string>>::value()); | |
} | |
}; | |
void print( const A& a ) { | |
std::cout << "a: " << a.a << ", b: " << a.b << ", c: " << a.c << "\n"; | |
} | |
template < class T > | |
void print( const std::vector<T>& array ) { | |
for( auto& e : array ) { | |
std::cout << e << ", "; | |
} | |
std::cout << "\n"; | |
} | |
template < class K, class V, class C, class A > | |
void print( const std::map<K,V,C,A>& map ) { | |
std::cout << "{\n"; | |
for( auto& e : map ) { | |
std::cout << " " << e.first << ": " << e.second << ",\n"; | |
} | |
std::cout << "}\n"; | |
} | |
#define MAP | |
int main() { | |
#if defined(OBJECT_A) | |
std::string input = example_a; | |
sax_reader<A> reader; | |
#elif defined(ARRAY) | |
std::string input = example_array; | |
sax_reader<std::vector<int>> reader; | |
#elif defined(MAP) | |
std::string input = example_map; | |
sax_reader<std::map<std::string,int>> reader; | |
#endif | |
try { | |
bool not_done = true; | |
while( not_done ) { | |
not_done = nlohmann::json::sax_parse(input, &reader); | |
} | |
print(reader.value()); | |
return 0; | |
} catch(...) { | |
return 1; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment