jefftrull/js_spirit2.cpp

## js_spirit2.cpp
// Attempt at making a Boost grammar that successfully parses Jason Shankel's predicate logic language
// #define BOOST_SPIRIT_DEBUG

#include <boost/spirit/include/qi.hpp>
#include <boost/variant/recursive_variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/optional.hpp>

// start with the AST
// ideally reflects the grammar in a natural way

// a predicate is something like name{p1, p2...}
// this type can also be used for keywords like #exists{p}
struct pred {
  std::string              id_;
  std::vector<std::string> paramlist_;
};
BOOST_FUSION_ADAPT_STRUCT(
  pred,
  (std::string, id_)
  (std::vector<std::string>, paramlist_)
)

struct fact;
typedef std::vector<fact> prodterm;
typedef std::vector<prodterm> sumterm;

typedef boost::variant<
  pred,
  boost::recursive_wrapper<sumterm>
  > term;

struct condneg {
  bool negated_;
  term term_;
  condneg() : negated_(false) {}
};
BOOST_FUSION_ADAPT_STRUCT(
  condneg,
  (bool, negated_)
  (term, term_)
)

struct fact {
  boost::optional<pred>   kwdExpr_;    // #predname{params}:, if present
  condneg                 condneg_;
};
BOOST_FUSION_ADAPT_STRUCT(
  fact,
  (boost::optional<pred>, kwdExpr_)
  (condneg, condneg_)
)

struct statement {
  pred    pred_;
  sumterm sumterm_;
};
BOOST_FUSION_ADAPT_STRUCT(
  statement,
  (pred, pred_)
  (sumterm, sumterm_)
)

// now the grammar, with synthesized attributes to the above, where possible

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

template <typename Iterator>
struct predicate_logic : qi::grammar<Iterator, statement(), ascii::space_type>
{
  predicate_logic() : predicate_logic::base_type(statement_)
  {
    using boost::spirit::eps;
    using boost::spirit::lexeme;
    using boost::spirit::lit;
    using boost::spirit::_1;
    using boost::spirit::_val;
    using boost::phoenix::at_c;
    using ascii::char_;

    // Note: I decided to rewrite the grammar here to reflect my understanding of the desired result

    identifier_     = lexeme[+(char_("a-zA-Z_0-9"))];
    identifierList_ = identifier_ >> *(',' >> identifier_) ;
    kwdLabel_       = lexeme['#' >> +char_("a-zA-Z_0-9")];
    kwdExpr_        = kwdLabel_ >> '{' >> identifierList_ >> '}' >> ':' ;
    pred_           = identifier_ >> '{' >> identifierList_ >> '}' ;
    term_           = pred_ | ( '(' >> sumterm_ >> ')' ) ;
    condneg_        = -lit('!')[at_c<0>(_val) = true] >> term_[at_c<1>(_val) = _1];
    sumterm_        = prodterm_ >> *('|' >> prodterm_) ;
    prodterm_       = fact_ >> *('&' >> fact_) ;
    // keyword predicate (i.e., #exists{A}:)
    // must decide its operator prededence by choosing what rule the following term obeys
    // IMO intuitively it should bind only to first following term: (possibly negated) predicate or parenthesized expression
    fact_           = -kwdExpr_ >> condneg_ ;

    statement_      = pred_ >> ":=" >> sumterm_ ;
    query_          = ":?" >> sumterm_ ;                // unused for now

    // debugging
    BOOST_SPIRIT_DEBUG_NODE(identifier_);
    BOOST_SPIRIT_DEBUG_NODE(identifierList_);
    BOOST_SPIRIT_DEBUG_NODE(kwdLabel_);
    BOOST_SPIRIT_DEBUG_NODE(kwdExpr_);
    BOOST_SPIRIT_DEBUG_NODE(pred_);
    BOOST_SPIRIT_DEBUG_NODE(term_);
    BOOST_SPIRIT_DEBUG_NODE(condneg_);
    BOOST_SPIRIT_DEBUG_NODE(sumterm_);
    BOOST_SPIRIT_DEBUG_NODE(prodterm_);
    BOOST_SPIRIT_DEBUG_NODE(fact_);
    BOOST_SPIRIT_DEBUG_NODE(statement_);
    BOOST_SPIRIT_DEBUG_NODE(query_);

  }

  qi::rule<Iterator, std::string(), ascii::space_type> identifier_, kwdLabel_;
  qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> identifierList_;
  qi::rule<Iterator, pred(), ascii::space_type> kwdExpr_, pred_;
  qi::rule<Iterator, term(), ascii::space_type> term_;
  qi::rule<Iterator, condneg(), ascii::space_type> condneg_;
  qi::rule<Iterator, prodterm(), ascii::space_type> prodterm_;
  qi::rule<Iterator, sumterm(), ascii::space_type> sumterm_;
  qi::rule<Iterator, fact(), ascii::space_type> fact_;
  qi::rule<Iterator, statement(), ascii::space_type> statement_;
  qi::rule<Iterator, sumterm(), ascii::space_type> query_;

};

bool testparse(const std::string& data) {
  typedef std::string::const_iterator iterator_type;
  predicate_logic<iterator_type> grammar;
  statement result;
  iterator_type beg = data.begin(), end = data.end();
  if (!phrase_parse(beg, end, grammar, ascii::space_type(), result)) {
    std::cerr << "parse failed!\n";
    return false;
  }
  if (beg != end) {
    std::cerr << "not all input was consumed; remaining: ";
    std::copy(beg, end, std::ostream_iterator<char>(std::cerr));
    return false;
  }
  std::cout << "parse successful\n";
  return true;
}

int main() {
  // example data given does not match grammar description - updated as it seemed best to me
  // original data:
  //  std::string data("alive{X}:=(plant(X)|animal(X))|#exists(Y):(Y(X)&alive(Y))");
  std::string data("alive{X}:=(plant{X}|animal{X})|#exists{Y}:(Y{X}&alive{Y})");
  if (!testparse(data))
    return 1;

  data = std::string("dead{X,Y}:=mineral{X}|extraterrestrial{Y}|darkmatter{Z}");
  if (!testparse(data))
    return 1;

  // this should be equivalent to "alive{X}:=(#exists{X}:Y{X})&!somepred{B}
  data = std::string("alive{X}:=#exists{X}:Y{X}&!somepred{B}");
  if (!testparse(data))
    return 1;

  // grouping with paren
  data = std::string("alive{X}:=#exists{X}:(Y{X}&!somepred{B})");
  if (!testparse(data))
    return 1;

  // deliberately separate, should be (#exists{X}:Y{X})|!somepred{B}
  data = std::string("alive{X}:=#exists{X}:Y{X}|!somepred{B}");
  if (!testparse(data))
    return 1;

  // a couple of failing parses, in the name of due diligence
  data = std::string("alive{X}:=B|somepred{A}");   // "B" lacks predicate
  if (testparse(data))
      return 1;

  data = std::string("alive{X}:=somepred{A}&");    // missing product term
  if (testparse(data))
      return 1;

  return 0;
}
	// Attempt at making a Boost grammar that successfully parses Jason Shankel's predicate logic language
	// #define BOOST_SPIRIT_DEBUG

	#include <boost/spirit/include/qi.hpp>
	#include <boost/variant/recursive_variant.hpp>
	#include <boost/fusion/include/adapt_struct.hpp>
	#include <boost/spirit/include/phoenix.hpp>
	#include <boost/optional.hpp>

	// start with the AST
	// ideally reflects the grammar in a natural way

	// a predicate is something like name{p1, p2...}
	// this type can also be used for keywords like #exists{p}
	struct pred {
	std::string id_;
	std::vector<std::string> paramlist_;
	};
	BOOST_FUSION_ADAPT_STRUCT(
	pred,
	(std::string, id_)
	(std::vector<std::string>, paramlist_)
	)

	struct fact;
	typedef std::vector<fact> prodterm;
	typedef std::vector<prodterm> sumterm;

	typedef boost::variant<
	pred,
	boost::recursive_wrapper<sumterm>
	> term;

	struct condneg {
	bool negated_;
	term term_;
	condneg() : negated_(false) {}
	};
	BOOST_FUSION_ADAPT_STRUCT(
	condneg,
	(bool, negated_)
	(term, term_)
	)

	struct fact {
	boost::optional<pred> kwdExpr_; // #predname{params}:, if present
	condneg condneg_;
	};
	BOOST_FUSION_ADAPT_STRUCT(
	fact,
	(boost::optional<pred>, kwdExpr_)
	(condneg, condneg_)
	)

	struct statement {
	pred pred_;
	sumterm sumterm_;
	};
	BOOST_FUSION_ADAPT_STRUCT(
	statement,
	(pred, pred_)
	(sumterm, sumterm_)
	)

	// now the grammar, with synthesized attributes to the above, where possible

	namespace qi = boost::spirit::qi;
	namespace ascii = boost::spirit::ascii;

	template <typename Iterator>
	struct predicate_logic : qi::grammar<Iterator, statement(), ascii::space_type>
	{
	predicate_logic() : predicate_logic::base_type(statement_)
	{
	using boost::spirit::eps;
	using boost::spirit::lexeme;
	using boost::spirit::lit;
	using boost::spirit::_1;
	using boost::spirit::_val;
	using boost::phoenix::at_c;
	using ascii::char_;

	// Note: I decided to rewrite the grammar here to reflect my understanding of the desired result

	identifier_ = lexeme[+(char_("a-zA-Z_0-9"))];
	identifierList_ = identifier_ >> *(',' >> identifier_) ;
	kwdLabel_ = lexeme['#' >> +char_("a-zA-Z_0-9")];
	kwdExpr_ = kwdLabel_ >> '{' >> identifierList_ >> '}' >> ':' ;
	pred_ = identifier_ >> '{' >> identifierList_ >> '}' ;
	term_ = pred_ \| ( '(' >> sumterm_ >> ')' ) ;
	condneg_ = -lit('!')[at_c<0>(_val) = true] >> term_[at_c<1>(_val) = _1];
	sumterm_ = prodterm_ >> *('\|' >> prodterm_) ;
	prodterm_ = fact_ >> *('&' >> fact_) ;
	// keyword predicate (i.e., #exists{A}:)
	// must decide its operator prededence by choosing what rule the following term obeys
	// IMO intuitively it should bind only to first following term: (possibly negated) predicate or parenthesized expression
	fact_ = -kwdExpr_ >> condneg_ ;

	statement_ = pred_ >> ":=" >> sumterm_ ;
	query_ = ":?" >> sumterm_ ; // unused for now

	// debugging
	BOOST_SPIRIT_DEBUG_NODE(identifier_);
	BOOST_SPIRIT_DEBUG_NODE(identifierList_);
	BOOST_SPIRIT_DEBUG_NODE(kwdLabel_);
	BOOST_SPIRIT_DEBUG_NODE(kwdExpr_);
	BOOST_SPIRIT_DEBUG_NODE(pred_);
	BOOST_SPIRIT_DEBUG_NODE(term_);
	BOOST_SPIRIT_DEBUG_NODE(condneg_);
	BOOST_SPIRIT_DEBUG_NODE(sumterm_);
	BOOST_SPIRIT_DEBUG_NODE(prodterm_);
	BOOST_SPIRIT_DEBUG_NODE(fact_);
	BOOST_SPIRIT_DEBUG_NODE(statement_);
	BOOST_SPIRIT_DEBUG_NODE(query_);

	}

	qi::rule<Iterator, std::string(), ascii::space_type> identifier_, kwdLabel_;
	qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> identifierList_;
	qi::rule<Iterator, pred(), ascii::space_type> kwdExpr_, pred_;
	qi::rule<Iterator, term(), ascii::space_type> term_;
	qi::rule<Iterator, condneg(), ascii::space_type> condneg_;
	qi::rule<Iterator, prodterm(), ascii::space_type> prodterm_;
	qi::rule<Iterator, sumterm(), ascii::space_type> sumterm_;
	qi::rule<Iterator, fact(), ascii::space_type> fact_;
	qi::rule<Iterator, statement(), ascii::space_type> statement_;
	qi::rule<Iterator, sumterm(), ascii::space_type> query_;

	};

	bool testparse(const std::string& data) {
	typedef std::string::const_iterator iterator_type;
	predicate_logic<iterator_type> grammar;
	statement result;
	iterator_type beg = data.begin(), end = data.end();
	if (!phrase_parse(beg, end, grammar, ascii::space_type(), result)) {
	std::cerr << "parse failed!\n";
	return false;
	}
	if (beg != end) {
	std::cerr << "not all input was consumed; remaining: ";
	std::copy(beg, end, std::ostream_iterator<char>(std::cerr));
	return false;
	}
	std::cout << "parse successful\n";
	return true;
	}

	int main() {
	// example data given does not match grammar description - updated as it seemed best to me
	// original data:
	// std::string data("alive{X}:=(plant(X)\|animal(X))\|#exists(Y):(Y(X)&alive(Y))");
	std::string data("alive{X}:=(plant{X}\|animal{X})\|#exists{Y}:(Y{X}&alive{Y})");
	if (!testparse(data))
	return 1;

	data = std::string("dead{X,Y}:=mineral{X}\|extraterrestrial{Y}\|darkmatter{Z}");
	if (!testparse(data))
	return 1;

	// this should be equivalent to "alive{X}:=(#exists{X}:Y{X})&!somepred{B}
	data = std::string("alive{X}:=#exists{X}:Y{X}&!somepred{B}");
	if (!testparse(data))
	return 1;

	// grouping with paren
	data = std::string("alive{X}:=#exists{X}:(Y{X}&!somepred{B})");
	if (!testparse(data))
	return 1;

	// deliberately separate, should be (#exists{X}:Y{X})\|!somepred{B}
	data = std::string("alive{X}:=#exists{X}:Y{X}\|!somepred{B}");
	if (!testparse(data))
	return 1;

	// a couple of failing parses, in the name of due diligence
	data = std::string("alive{X}:=B\|somepred{A}"); // "B" lacks predicate
	if (testparse(data))
	return 1;

	data = std::string("alive{X}:=somepred{A}&"); // missing product term
	if (testparse(data))
	return 1;

	return 0;
	}