Last active
October 28, 2019 13:25
-
-
Save jefftrull/1544797 to your computer and use it in GitHub Desktop.
testcase for useful expectation failure error messages when using spirit::lex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// testcase for nice error messages in lexer-based parser | |
#define BOOST_SPIRIT_USE_PHOENIX_V3 | |
#include <iostream> | |
#include <sstream> | |
#include <boost/spirit/include/phoenix.hpp> | |
#include <boost/spirit/include/lex_lexertl.hpp> | |
#include <boost/spirit/include/lex_lexertl_position_token.hpp> | |
#include <boost/spirit/include/qi.hpp> | |
#include <boost/spirit/include/support_istream_iterator.hpp> | |
enum TokenIds { | |
T_ALPHA = 1000, // can't start at 0 == EOF | |
T_BETA, T_GAMMA, T_DELTA, T_OMEGA, | |
T_ANY | |
}; | |
template <typename Lexer> | |
struct Tokens : boost::spirit::lex::lexer<Lexer> | |
{ | |
Tokens() | |
{ | |
namespace lex = boost::spirit::lex; | |
this->self = | |
lex::string("ALPHA", T_ALPHA) | |
| lex::string("BETA", T_BETA) | |
| lex::string("GAMMA", T_GAMMA) | |
| lex::string("DELTA", T_DELTA) | |
| lex::string("OMEGA", T_OMEGA) | |
; | |
// whitespace | |
this->self += lex::string("[ \\t\\n]+") | |
[ | |
lex::_pass = lex::pass_flags::pass_ignore | |
]; | |
// catchall (mostly for error processing) | |
this->self += lex::string(".", T_ANY); | |
} | |
}; | |
using namespace boost::spirit::qi; | |
// error handling function | |
struct error_info_impl | |
{ | |
// required by phoenix::function; gives signature | |
template <typename Signature> | |
struct result | |
{ | |
typedef std::string type; | |
}; | |
template<typename Iterator, typename What> | |
std::string operator()(Iterator const& actual_token_iter, | |
What const& what) const | |
{ | |
return "Error! Expecting: " + boost::lexical_cast<std::string>(what) + | |
" but saw: " + std::string(actual_token_iter->matched().begin(), | |
actual_token_iter->matched().end()); | |
} | |
}; | |
// Make it Phoenix-compatible (i.e., lazy) | |
boost::phoenix::function<error_info_impl> error_info; | |
template <typename Iterator, typename Lexer> | |
struct Parser : boost::spirit::qi::grammar<Iterator> | |
{ | |
template <typename TokenDef> | |
Parser(TokenDef const& tok) : Parser::base_type(start) | |
{ | |
using namespace boost::spirit::qi; | |
namespace qi = boost::spirit::qi; | |
using boost::phoenix::val; // for error handling | |
using boost::phoenix::construct; // for error handling | |
alpha_beta = qi::raw_token(T_ALPHA) > qi::raw_token(T_BETA) ; | |
delta_gamma = qi::raw_token(T_DELTA) > qi::raw_token(T_GAMMA) ; | |
start = *(alpha_beta | delta_gamma); | |
alpha_beta.name("AB"); | |
delta_gamma.name("DG"); | |
on_error<fail> | |
( | |
start | |
, std::cerr << error_info(boost::spirit::_3, boost::spirit::_4) << std::endl | |
); | |
} | |
rule<Iterator> alpha_beta, delta_gamma, start; | |
}; | |
// typedef for stream iterator we will use | |
typedef boost::spirit::istream_iterator StreamIter; | |
// lexer needs the iterator type and a list of token attribute types | |
typedef boost::spirit::lex::lexertl::position_token<StreamIter, | |
boost::mpl::vector<int, double, std::string> > Token; | |
typedef boost::spirit::lex::lexertl::actor_lexer<Token> Lexer; | |
Tokens<Lexer> tokens; | |
Parser<Tokens<Lexer>::iterator_type, Tokens<Lexer>::lexer_def > myParser(tokens); | |
int main() { | |
std::stringstream testdata("ALPHA BETA DELTA BETA"); | |
testdata.unsetf(std::ios::skipws); | |
StreamIter beg = StreamIter(testdata), end; | |
Tokens<Lexer>::iterator_type it = tokens.begin(beg, end); | |
Tokens<Lexer>::iterator_type lex_end = tokens.end(); | |
if (!parse(it, lex_end, myParser)) { | |
std::cerr << "parsing failed\n"; | |
return 1; | |
} | |
if (beg != end) { | |
std::cerr << "not all input consumed\n"; | |
return 1; | |
} | |
} |
So it seems! I can't recall if the intent of this gist was to demonstrate that problem or whether I erroneously thought it was working... I updated it to show my latest code, which doesn't have that issue.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The error message seems to contain gibberish where the quoted extract from the input should be:
Error! Expecting "raw_token(1002)" here: "�"
parsing failed