Skip to content

Instantly share code, notes, and snippets.

@agrippa1994
Forked from jefftrull/js_spirit2.cpp
Created January 28, 2016 12:22
Show Gist options
  • Save agrippa1994/b63ce4ddcdbff0f11017 to your computer and use it in GitHub Desktop.
Save agrippa1994/b63ce4ddcdbff0f11017 to your computer and use it in GitHub Desktop.
Attempted solution to "Boost Spirit vs. Flex/Bison" comparison
// Attempt at making a Boost grammar that successfully parses Jason Shankel's predicate logic language
// #define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/variant/recursive_variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/optional.hpp>
// start with the AST
// ideally reflects the grammar in a natural way
// a predicate is something like name{p1, p2...}
// this type can also be used for keywords like #exists{p}
struct pred {
std::string id_;
std::vector<std::string> paramlist_;
};
BOOST_FUSION_ADAPT_STRUCT(
pred,
(std::string, id_)
(std::vector<std::string>, paramlist_)
)
struct fact;
typedef std::vector<fact> prodterm;
typedef std::vector<prodterm> sumterm;
typedef boost::variant<
pred,
boost::recursive_wrapper<sumterm>
> term;
struct condneg {
bool negated_;
term term_;
condneg() : negated_(false) {}
};
BOOST_FUSION_ADAPT_STRUCT(
condneg,
(bool, negated_)
(term, term_)
)
struct fact {
boost::optional<pred> kwdExpr_; // #predname{params}:, if present
condneg condneg_;
};
BOOST_FUSION_ADAPT_STRUCT(
fact,
(boost::optional<pred>, kwdExpr_)
(condneg, condneg_)
)
struct statement {
pred pred_;
sumterm sumterm_;
};
BOOST_FUSION_ADAPT_STRUCT(
statement,
(pred, pred_)
(sumterm, sumterm_)
)
// now the grammar, with synthesized attributes to the above, where possible
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct predicate_logic : qi::grammar<Iterator, statement(), ascii::space_type>
{
predicate_logic() : predicate_logic::base_type(statement_)
{
using boost::spirit::eps;
using boost::spirit::lexeme;
using boost::spirit::lit;
using boost::spirit::_1;
using boost::spirit::_val;
using boost::phoenix::at_c;
using ascii::char_;
// Note: I decided to rewrite the grammar here to reflect my understanding of the desired result
identifier_ = lexeme[+(char_("a-zA-Z_0-9"))];
identifierList_ = identifier_ >> *(',' >> identifier_) ;
kwdLabel_ = lexeme['#' >> +char_("a-zA-Z_0-9")];
kwdExpr_ = kwdLabel_ >> '{' >> identifierList_ >> '}' >> ':' ;
pred_ = identifier_ >> '{' >> identifierList_ >> '}' ;
term_ = pred_ | ( '(' >> sumterm_ >> ')' ) ;
condneg_ = -lit('!')[at_c<0>(_val) = true] >> term_[at_c<1>(_val) = _1];
sumterm_ = prodterm_ >> *('|' >> prodterm_) ;
prodterm_ = fact_ >> *('&' >> fact_) ;
// keyword predicate (i.e., #exists{A}:)
// must decide its operator prededence by choosing what rule the following term obeys
// IMO intuitively it should bind only to first following term: (possibly negated) predicate or parenthesized expression
fact_ = -kwdExpr_ >> condneg_ ;
statement_ = pred_ >> ":=" >> sumterm_ ;
query_ = ":?" >> sumterm_ ; // unused for now
// debugging
BOOST_SPIRIT_DEBUG_NODE(identifier_);
BOOST_SPIRIT_DEBUG_NODE(identifierList_);
BOOST_SPIRIT_DEBUG_NODE(kwdLabel_);
BOOST_SPIRIT_DEBUG_NODE(kwdExpr_);
BOOST_SPIRIT_DEBUG_NODE(pred_);
BOOST_SPIRIT_DEBUG_NODE(term_);
BOOST_SPIRIT_DEBUG_NODE(condneg_);
BOOST_SPIRIT_DEBUG_NODE(sumterm_);
BOOST_SPIRIT_DEBUG_NODE(prodterm_);
BOOST_SPIRIT_DEBUG_NODE(fact_);
BOOST_SPIRIT_DEBUG_NODE(statement_);
BOOST_SPIRIT_DEBUG_NODE(query_);
}
qi::rule<Iterator, std::string(), ascii::space_type> identifier_, kwdLabel_;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> identifierList_;
qi::rule<Iterator, pred(), ascii::space_type> kwdExpr_, pred_;
qi::rule<Iterator, term(), ascii::space_type> term_;
qi::rule<Iterator, condneg(), ascii::space_type> condneg_;
qi::rule<Iterator, prodterm(), ascii::space_type> prodterm_;
qi::rule<Iterator, sumterm(), ascii::space_type> sumterm_;
qi::rule<Iterator, fact(), ascii::space_type> fact_;
qi::rule<Iterator, statement(), ascii::space_type> statement_;
qi::rule<Iterator, sumterm(), ascii::space_type> query_;
};
bool testparse(const std::string& data) {
typedef std::string::const_iterator iterator_type;
predicate_logic<iterator_type> grammar;
statement result;
iterator_type beg = data.begin(), end = data.end();
if (!phrase_parse(beg, end, grammar, ascii::space_type(), result)) {
std::cerr << "parse failed!\n";
return false;
}
if (beg != end) {
std::cerr << "not all input was consumed; remaining: ";
std::copy(beg, end, std::ostream_iterator<char>(std::cerr));
return false;
}
std::cout << "parse successful\n";
return true;
}
int main() {
// example data given does not match grammar description - updated as it seemed best to me
// original data:
// std::string data("alive{X}:=(plant(X)|animal(X))|#exists(Y):(Y(X)&alive(Y))");
std::string data("alive{X}:=(plant{X}|animal{X})|#exists{Y}:(Y{X}&alive{Y})");
if (!testparse(data))
return 1;
data = std::string("dead{X,Y}:=mineral{X}|extraterrestrial{Y}|darkmatter{Z}");
if (!testparse(data))
return 1;
// this should be equivalent to "alive{X}:=(#exists{X}:Y{X})&!somepred{B}
data = std::string("alive{X}:=#exists{X}:Y{X}&!somepred{B}");
if (!testparse(data))
return 1;
// grouping with paren
data = std::string("alive{X}:=#exists{X}:(Y{X}&!somepred{B})");
if (!testparse(data))
return 1;
// deliberately separate, should be (#exists{X}:Y{X})|!somepred{B}
data = std::string("alive{X}:=#exists{X}:Y{X}|!somepred{B}");
if (!testparse(data))
return 1;
// a couple of failing parses, in the name of due diligence
data = std::string("alive{X}:=B|somepred{A}"); // "B" lacks predicate
if (testparse(data))
return 1;
data = std::string("alive{X}:=somepred{A}&"); // missing product term
if (testparse(data))
return 1;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment