Skip to content

Instantly share code, notes, and snippets.

@alebastr
Created January 2, 2020 21:43
Show Gist options
  • Save alebastr/5749f30a9bd0fb4ba21a22e0d51dbbe4 to your computer and use it in GitHub Desktop.
Save alebastr/5749f30a9bd0fb4ba21a22e0d51dbbe4 to your computer and use it in GitHub Desktop.
Boost::Spirit toy JSON Parser
#include <iostream>
#include <iomanip>
#include <map>
#include <vector>
#include <string>
//#define BOOST_SPIRIT_DEBUG
/* Required */
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/optional.hpp>
#include <boost/phoenix/object/construct.hpp>
#include <boost/ptr_container/ptr_map.hpp>
#include <boost/spirit/include/qi_core.hpp>
#include <boost/variant/recursive_variant.hpp>
/*
* Type name and index implementations.
* GCC 4.4 does not have std::type_index
*/
#include <typeinfo>
// for boost::units::detail::demangle
#include <boost/units/detail/utility.hpp>
#ifdef HAVE_TYPE_INDEX
#include <typeindex>
typedef std::type_index type_index_t;
#else
typedef std::string type_index_t;
#endif
template<typename _Type>
struct TypeHelper {
static std::string name() {
return boost::units::detail::demangle(typeid(_Type).name());
}
static type_index_t index() {
#ifdef HAVE_TYPE_INDEX
return typeid(_Type);
#else
return name();
#endif
}
};
namespace qi = boost::spirit::qi;
namespace JSON {
/* Common type for containers with different parsers */
template<typename _Iterator, typename _Skipper>
struct AnyValueGrammar {
virtual ~AnyValueGrammar() { }
};
template<
typename _Iterator,
typename _Type,
typename _Skipper = qi::blank_type,
typename _Enable = void
>
struct ValueGrammar
: qi::grammar<_Iterator, _Type(), _Skipper>
, AnyValueGrammar<_Iterator, _Skipper>
{
qi::rule<_Iterator, _Type(), _Skipper> start;
ValueGrammar()
: ValueGrammar::base_type(start)
{
throw std::runtime_error(
"ValueGrammar does not have matching implementation for " +
TypeHelper<_Type>::name());
}
};
/*
* Cache and keep ownership for instances of ValueGrammar
*/
template<typename _Iterator, typename _Skipper>
struct ValueGrammarCache {
typedef ValueGrammarCache<_Iterator, _Skipper> container_t;
template<typename _Type>
static ValueGrammar<_Iterator, _Type, _Skipper>& get()
{
typedef ValueGrammar<_Iterator, _Type, _Skipper> grammar_t;
type_index_t tid = TypeHelper<_Type>::index();
#if defined(BOOST_SPIRIT_DEBUG)
std::cout << "Lookup cached parser for " <<
TypeHelper<_Type>::name() << std::endl;
#endif
if (container_t::cache.count(tid) == 0) {
#if defined(BOOST_SPIRIT_DEBUG)
std::cout << "Constructing new parser for " <<
TypeHelper<_Type>::name() << std::endl;
#endif
container_t::cache.insert(tid, new grammar_t());
}
return static_cast<grammar_t&>(container_t::cache[tid]);
}
private:
static boost::ptr_map<type_index_t,
AnyValueGrammar<_Iterator, _Skipper> > cache;
};
template<typename _Iterator, typename _Skipper>
boost::ptr_map<type_index_t, AnyValueGrammar<_Iterator, _Skipper> >
ValueGrammarCache<_Iterator, _Skipper>::cache =
boost::ptr_map<type_index_t, AnyValueGrammar<_Iterator, _Skipper> >();
/*
* Convenient wrappers for quick parsing
*/
template<typename _Result, typename _Iterator>
void Parse(_Result& value, _Iterator& begin, const _Iterator& end,
/* strict parsing should consume whole input */
bool strict = false)
{
typedef ValueGrammarCache<_Iterator, qi::space_type> cache_t;
bool result = qi::phrase_parse(begin, end,
cache_t::template get<_Result>(), qi::space, value);
if (!result || (strict && begin != end)) {
throw std::runtime_error(
"Cannot parse " + TypeHelper<_Result>::name());
}
}
template<typename _Result, typename _Iterator>
_Result Parse(_Iterator& begin, const _Iterator& end,
/* strict parsing should consume whole input */
bool strict = false)
{
_Result value;
Parse<_Result>(value, begin, end, strict);
return value;
}
template<typename _Result>
_Result Parse(const std::string& input, bool strict = true)
{
std::string::const_iterator iter = input.begin();
_Result value;
Parse<_Result>(value, iter, input.end(), strict);
return value;
}
} // namespace JSON
#include <boost/spirit/include/phoenix.hpp>
namespace JSON {
namespace ph = boost::phoenix;
typedef long long JsonInt_t;
struct JsonNull_t { };
/*
* Parse to Boost.Variant tree
*/
typedef boost::make_recursive_variant<
JsonInt_t
, JsonNull_t
, bool
, double
, std::string
, std::vector<boost::recursive_variant_>
, std::map<std::string, boost::recursive_variant_>
>::type JsonValue_t;
template<typename _Iterator, typename _Skipper>
struct ValueGrammar<_Iterator, JsonValue_t, _Skipper>
: qi::grammar<_Iterator, JsonValue_t(), _Skipper>
, AnyValueGrammar<_Iterator, _Skipper>
{
typedef ValueGrammarCache<_Iterator, _Skipper> cache_t;
qi::rule<_Iterator, std::vector<JsonValue_t>(), _Skipper> Array;
qi::rule<_Iterator, std::map<std::string, JsonValue_t>(), _Skipper> Object;
qi::rule<_Iterator, JsonValue_t(), _Skipper> value;
ValueGrammar()
: ValueGrammar::base_type(value)
{
value
= cache_t::template get<JsonNull_t>()
| cache_t::template get<bool>()
/*
* No type hint, therefore we can't distinguish double and int.
* Forbid parsing numbers without dot as double
*/
| qi::real_parser<double, qi::strict_real_policies<double> >()
| cache_t::template get<JsonInt_t>()
| cache_t::template get<std::string>()
| Array
| Object
;
Array = '[' > -(value % ',') > ']';
Object
= '{'
> -(( cache_t::template get<std::string>()
> ':'
> value
) % ',')
> '}'
;
using namespace qi::labels;
qi::on_error<qi::fail>
(
value
, std::cout
<< ph::val("Error! Expecting ")
<< _4
<< ph::val(" here: \"")
<< ph::construct<std::string>(_3, _2)
<< ph::val("\"")
<< std::endl
);
}
};
} // namespace JSON
/*
* Type-specific part
*/
namespace JSON {
template<typename _Iterator, typename _Skipper>
struct ValueGrammar<_Iterator, std::string, _Skipper>
: qi::grammar<_Iterator, std::string(), _Skipper>
, AnyValueGrammar<_Iterator, _Skipper>
{
qi::symbols<const char, const char> escape_sequences;
qi::rule<_Iterator, char()> json_char;
qi::rule<_Iterator, std::string(), _Skipper> start;
ValueGrammar()
: ValueGrammar::base_type(start)
{
escape_sequences.add
("\\\"",'\"')
("\\\\",'\\')
("\\/", '/')
("\\b", '\b')
("\\f", '\f')
("\\n", '\n')
("\\r", '\r')
("\\t", '\t')
;
json_char
= escape_sequences
// FIXME: convert to UTF-8
| "\\u" >> qi::uint_parser<uint16_t, 16, 4, 4>()
| (qi::char_ - '\\' - '"' - qi::cntrl)
;
start = qi::lexeme['"' > *json_char > '"'];
}
};
// a hack for multiple template parameters within container type
#define COMMA ,
#define PrimitiveParser(_Type, _Body) \
template<typename _Iterator, typename _Skipper> \
struct ValueGrammar<_Iterator, _Type, _Skipper> \
: qi::grammar<_Iterator, _Type(), _Skipper> \
, AnyValueGrammar<_Iterator, _Skipper> \
{ \
qi::rule<_Iterator, _Type(), _Skipper> start; \
ValueGrammar() : ValueGrammar::base_type(start) \
_Body \
}
#define ContainerParser(_ContainerType, _Body) \
template<typename _Iterator,typename _Type,typename _Skipper> \
struct ValueGrammar<_Iterator, _ContainerType, _Skipper> \
: qi::grammar<_Iterator, _ContainerType(), _Skipper> \
, AnyValueGrammar<_Iterator, _Skipper> \
{ \
typedef ValueGrammarCache<_Iterator, _Skipper> cache_t; \
qi::rule<_Iterator, _ContainerType(), _Skipper> start; \
ValueGrammar() : ValueGrammar::base_type(start) \
_Body \
}
PrimitiveParser(bool, { start = qi::bool_; });
PrimitiveParser( uint8_t, { start = qi::uint_parser< uint8_t>(); });
PrimitiveParser(uint16_t, { start = qi::uint_parser<uint16_t>(); });
PrimitiveParser(uint32_t, { start = qi::uint_parser<uint32_t>(); });
PrimitiveParser(uint64_t, { start = qi::uint_parser<uint64_t>(); });
PrimitiveParser( int8_t, { start = qi::int_parser< int8_t>(); });
PrimitiveParser(int16_t, { start = qi::int_parser<int16_t>(); });
PrimitiveParser(int32_t, { start = qi::int_parser<int32_t>(); });
PrimitiveParser(int64_t, { start = qi::int_parser<int64_t>(); });
PrimitiveParser(JsonInt_t, { start = qi::int_parser<JsonInt_t>(); });
PrimitiveParser(double, {
start = (qi::real_parser<double, qi::real_policies<double> >());
});
PrimitiveParser(JsonNull_t, {
start = qi::lit("null") [qi::_val = JsonNull_t()];
});
ContainerParser(std::map<std::string COMMA _Type>, {
start
= '{'
> -(( cache_t::template get<std::string>()
> ':'
> cache_t::template get<_Type>()
) % ',')
> '}'
;
});
ContainerParser(std::vector<_Type>, {
start = '[' > -(cache_t::template get<_Type>() % ',') > ']';
});
ContainerParser(boost::optional<_Type>, {
start
= cache_t::template get<JsonNull_t>() [qi::_val = boost::none]
| cache_t::template get<_Type>()
[
qi::_val =
boost::phoenix::construct<boost::optional<_Type> >(qi::_1)
]
;
});
#undef COMMA
#undef PrimitiveParser
#undef ContainerParser
} // namespace JSON
#include <boost/mpl/range_c.hpp>
#include <boost/fusion/include/accumulate.hpp>
#include <boost/fusion/include/at_c.hpp>
#include <boost/fusion/include/for_each.hpp>
#include <boost/fusion/include/is_sequence.hpp>
#include <boost/fusion/include/mpl.hpp>
#include <boost/phoenix/fusion.hpp>
#include <boost/spirit/include/qi_eps.hpp>
#include <boost/type_traits/remove_reference.hpp>
#include <boost/utility/enable_if.hpp>
namespace JSON {
namespace fusion = boost::fusion;
namespace mpl = boost::mpl;
namespace ph = boost::phoenix;
/*
* Parse structs adapted to boost::fusion sequence
*/
template<typename _Iterator, typename _Type, typename _Skipper>
struct ValueGrammar<_Iterator, _Type, _Skipper,
typename boost::enable_if<
typename fusion::traits::is_sequence<_Type>::type>::type>
: qi::grammar<_Iterator, _Type(), _Skipper>
, AnyValueGrammar<_Iterator, _Skipper>
{
typedef struct ValueGrammar<_Iterator, _Type, _Skipper> this_t;
typedef ValueGrammarCache<_Iterator, _Skipper> cache_t;
/*
* Parse struct field and assign value to a field of qi:_r1 passed from parent rule.
*/
typedef qi::rule<_Iterator, void(_Type&), _Skipper> member_rule_t;
/*
* boost::spirit does not keep ownership of rules in a composite rule.
* Therefore if one wants to generate some composite rule at runtime,
* all intermediate rules should be stored as well.
*
* fusion::fold does not allow us to move result of previous iteration, so it's useless here
*/
std::list<member_rule_t> members;
struct CreateRule {
this_t& self;
CreateRule(this_t& o) : self(o) {}
template<typename Index>
void operator() (Index /* unused */) const
{
typedef typename boost::remove_reference<
typename fusion::result_of::at_c<_Type, Index::value>::type
>::type value_t;
std::string field_name =
fusion::extension::struct_member_name<_Type, Index::value>::call();
#if defined(BOOST_SPIRIT_DEBUG)
std::cout << field_name << ": " <<
TypeHelper<value_t>::name() << std::endl;
#endif
self.members.push_back(
self.members.back()(qi::_r1)
| ( qi::omit
[
qi::lexeme['"' >> qi::lit(field_name) >> '"']
]
> ':'
> cache_t::template get<value_t>()
[
ph::at_c<Index::value>(qi::_r1) = qi::_1
]
)
);
}
};
qi::rule<_Iterator, _Type(), _Skipper> start;
ValueGrammar()
: ValueGrammar::base_type(start)
{
typedef mpl::range_c<size_t, 0,
fusion::result_of::size<_Type>::value> MemberIndices;
/* Initial value, O(1) and never matches */
members.push_back(qi::eps(false));
// [&](auto i) {...} is available only in C++14
fusion::for_each(MemberIndices(), CreateRule(*this));
start
= '{'
> -(( members.back()(qi::_val)
/* Ignore unknown fields */
| qi::omit
[
cache_t::template get<std::string>()
> ':'
> cache_t::template get<JsonValue_t>()
]
) % ',')
> '}'
;
using namespace qi::labels;
qi::on_error<qi::fail>
(
start
, std::cout
<< ph::val("Error! Expecting ")
<< _4
<< ph::val(" here: \"")
<< ph::construct<std::string>(_3, _2)
<< ph::val("\"")
<< std::endl
);
}
};
} // namespace JSON
/*******************************
* Tests *
*******************************/
#define BOOST_TEST_MODULE JSON
#include <boost/test/included/unit_test.hpp>
#include <fstream>
#include <boost/fusion/include/adapted.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/optional/optional_io.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/variant/get.hpp>
namespace Model {
struct ExampleStruct {
std::string Name;
std::string Value;
size_t Length;
};
}
BOOST_FUSION_ADAPT_STRUCT(
struct Model::ExampleStruct,
(std::string, Name)
(std::string, Value)
(size_t, Length)
)
typedef std::map<std::string, double> double_map_t;
BOOST_FUSION_DEFINE_STRUCT(
(Model), ComplexContainer,
(std::string, Name)
(Model::ExampleStruct, SingleStruct)
(double_map_t, Map)
(std::vector<std::string>, Strings)
(std::vector<Model::ExampleStruct>, Structs)
(JSON::JsonValue_t, AnyData)
(boost::optional<size_t>, Optional)
(boost::optional<size_t>, EmptyOptional)
)
BOOST_AUTO_TEST_SUITE(JSON)
BOOST_AUTO_TEST_CASE(String)
{
const std::string input = "\"Hello World!\"";
std::string value = JSON::Parse<std::string>(input);
BOOST_CHECK_EQUAL(value, "Hello World!");
}
BOOST_AUTO_TEST_CASE(Array)
{
const std::string input = "[ \"Hello World!\", \"test\",\"no-space\" ]";
std::vector<std::string> value =
JSON::Parse<std::vector<std::string> >(input);
BOOST_CHECK_EQUAL(value.size(), 3);
BOOST_CHECK_EQUAL(value[0], "Hello World!");
BOOST_CHECK_EQUAL(value[1], "test");
BOOST_CHECK_EQUAL(value[2], "no-space");
}
BOOST_AUTO_TEST_CASE(EmptyArray)
{
const std::string input = "[]";
std::vector<std::string> value =
JSON::Parse<std::vector<std::string> >(input);
BOOST_CHECK_EQUAL(value.size(), 0);
}
BOOST_AUTO_TEST_CASE(StringValue)
{
const std::string input = "\"Hello World!\"";
JSON::JsonValue_t value =
JSON::Parse<JSON::JsonValue_t>(input);
BOOST_CHECK_EQUAL(boost::get<std::string>(value), "Hello World!");
}
BOOST_AUTO_TEST_CASE(ArrayValue)
{
const std::string input = "[\"Hello World!\", \"test\",\"no-space\"]";
JSON::JsonValue_t value =
JSON::Parse<JSON::JsonValue_t>(input);
std::vector<JSON::JsonValue_t> vec =
boost::get<std::vector<JSON::JsonValue_t> >(value);
BOOST_CHECK_EQUAL(vec.size(), 3);
BOOST_CHECK_EQUAL(boost::get<std::string>(vec[0]), "Hello World!");
BOOST_CHECK_EQUAL(boost::get<std::string>(vec[1]), "test");
BOOST_CHECK_EQUAL(boost::get<std::string>(vec[2]), "no-space");
}
BOOST_AUTO_TEST_CASE(NestedArrayValue)
{
const std::string input = "[[[[[[[[[[[[[[[[[[[\"Not too deep\"]]]]]]]]]]]]]]]]]]]";
BOOST_CHECK_NO_THROW(JSON::Parse<JSON::JsonValue_t>(input));
}
BOOST_AUTO_TEST_CASE(ObjectValue)
{
const std::string input = "{\"hello\":\"Hello World!\", \"test\":\"test\",\"no-space\" : 1}";
JSON::JsonValue_t value =
JSON::Parse<JSON::JsonValue_t>(input);
std::map<std::string, JSON::JsonValue_t> vec =
boost::get<std::map<std::string, JSON::JsonValue_t> >(value);
BOOST_CHECK_EQUAL(vec.size(), 3);
BOOST_CHECK_EQUAL(boost::get<std::string>(vec["hello"]), "Hello World!");
BOOST_CHECK_EQUAL(boost::get<std::string>(vec["test"]), "test");
BOOST_CHECK_EQUAL(boost::get<JSON::JsonInt_t>(vec["no-space"]), 1);
}
BOOST_AUTO_TEST_CASE(ExampleStruct)
{
const std::string input = "{\"Name\":\"Hello World!\", \"Value\":\"test\",\"Length\" : 1}";
Model::ExampleStruct value =
JSON::Parse<Model::ExampleStruct>(input);
BOOST_CHECK_EQUAL(value.Name, "Hello World!");
BOOST_CHECK_EQUAL(value.Value, "test");
BOOST_CHECK_EQUAL(value.Length, 1);
}
BOOST_AUTO_TEST_CASE(ComplexContainer)
{
const std::string input =
"{\n"
" \"Name\":\"ComplexContainer\",\n"
" \"SingleStruct\": {\n"
" \"Name\":\"Contained\", \"Value\":\"test\",\"Length\" : 1\n"
" },\n"
" \"Map\": {\n"
" \"First\": 1.0, \"Second\": 2.0, \"Third\" : 3.0\n"
" },\n"
" \"Strings\": [\"First\", \"Second\", \"Third\"],\n"
" \"Structs\": [\n"
" {\"Name\":\"First\", \"Length\" : 100}\n"
" ],\n"
" \"EmptyOptional\": null,\n"
" \"Optional\": 31337,\n"
" \"AnyData\": { \"test\":\"value\" }\n"
"}";
/* Check that the input can be parsed without knowledge about types */
JSON::JsonValue_t unmapped =
JSON::Parse<JSON::JsonValue_t>(input);
Model::ComplexContainer value =
JSON::Parse<Model::ComplexContainer>(input);
BOOST_CHECK_EQUAL(value.Name, "ComplexContainer");
/* SingleStruct */
BOOST_CHECK_EQUAL(value.SingleStruct.Name, "Contained");
BOOST_CHECK_EQUAL(value.SingleStruct.Value, "test");
BOOST_CHECK_EQUAL(value.SingleStruct.Length, 1);
/* Map */
BOOST_CHECK_EQUAL(value.Map.size(), 3);
BOOST_CHECK_EQUAL(value.Map["First"], 1.0);
BOOST_CHECK_EQUAL(value.Map["Second"], 2.0);
BOOST_CHECK_EQUAL(value.Map["Third"], 3.0);
/* Strings */
BOOST_CHECK_EQUAL(value.Strings.size(), 3);
BOOST_CHECK_EQUAL(value.Strings[0], "First");
BOOST_CHECK_EQUAL(value.Strings[1], "Second");
BOOST_CHECK_EQUAL(value.Strings[2], "Third");
/* Structs */
BOOST_CHECK_EQUAL(value.Structs.size(), 1);
BOOST_CHECK_EQUAL(value.Structs[0].Name, "First");
/* Default handling could be better */
BOOST_CHECK_EQUAL(value.Structs[0].Value, "");
BOOST_CHECK_EQUAL(value.Structs[0].Length, 100);
BOOST_CHECK_EQUAL(value.EmptyOptional, boost::none);
BOOST_CHECK_EQUAL(value.Optional, boost::optional<size_t>(31337));
/* JSON subtree withot schema */
std::map<std::string, JSON::JsonValue_t> data =
boost::get<std::map<std::string, JSON::JsonValue_t> >(value.AnyData);
BOOST_CHECK_EQUAL(boost::get<std::string>(data["test"]), "value");
}
BOOST_AUTO_TEST_CASE(Integers)
{
BOOST_CHECK_EQUAL(JSON::Parse< int8_t> ("-8"), -8);
BOOST_CHECK_EQUAL(JSON::Parse< int16_t>("-16"), -16);
BOOST_CHECK_EQUAL(JSON::Parse< int32_t>("-32"), -32);
BOOST_CHECK_EQUAL(JSON::Parse< int64_t>("-64"), -64);
BOOST_CHECK_EQUAL(JSON::Parse< int8_t> ("8"), 8);
BOOST_CHECK_EQUAL(JSON::Parse< int16_t>("16"), 16);
BOOST_CHECK_EQUAL(JSON::Parse< int32_t>("32"), 32);
BOOST_CHECK_EQUAL(JSON::Parse< int64_t>("64"), 64);
BOOST_CHECK_EQUAL(JSON::Parse<uint8_t> ("8"), 8);
BOOST_CHECK_EQUAL(JSON::Parse<uint16_t>("16"), 16);
BOOST_CHECK_EQUAL(JSON::Parse<uint32_t>("32"), 32);
BOOST_CHECK_EQUAL(JSON::Parse<uint64_t>("64"), 64);
}
BOOST_AUTO_TEST_CASE(ExistingValues)
{
const std::string input = "{\"Name\":\"Changed\", \"Length\" : 4}";
Model::ExampleStruct value;
value.Name = "Name";
value.Value = "Value";
value.Length = 1;
std::string::const_iterator iter = input.begin();
JSON::Parse<Model::ExampleStruct>(value, iter, input.end(), true);
BOOST_CHECK_EQUAL(value.Name, "Changed");
BOOST_CHECK_EQUAL(value.Value, "Value");
BOOST_CHECK_EQUAL(value.Length, 4);
}
/* http://www.json.org/JSON_checker/test.zip
* Failing tests:
* fail1.json - allowed in updated RFC
* fail13.json - need to forbid leading zero in int_parser
* fail18.json - no max depth limitation
*/
BOOST_AUTO_TEST_CASE_EXPECTED_FAILURES(JSON_checker, 3)
BOOST_AUTO_TEST_CASE(JSON_checker)
{
for (size_t i = 1; i <= 3; ++i) {
std::string file = "tests/json/pass" +
boost::lexical_cast<std::string>(i) + ".json";
BOOST_TEST_MESSAGE(file);
std::ifstream in(file);
in.unsetf(std::ios::skipws);
boost::spirit::istream_iterator iter(in);
BOOST_CHECK_NO_THROW(JSON::Parse<JSON::JsonValue_t>(iter,
boost::spirit::istream_iterator(), true));
}
for (size_t i = 1; i <= 33; ++i) {
std::string file = "tests/json/fail" +
boost::lexical_cast<std::string>(i) + ".json";
BOOST_TEST_MESSAGE(file);
std::ifstream in(file);
in.unsetf(std::ios::skipws);
boost::spirit::istream_iterator iter(in);
BOOST_CHECK_THROW(JSON::Parse<JSON::JsonValue_t>(iter,
boost::spirit::istream_iterator(), true), std::exception);
}
}
BOOST_AUTO_TEST_SUITE_END()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment