Skip to content

Instantly share code, notes, and snippets.

@pstuifzand
Last active December 23, 2015 14:39
Show Gist options
  • Save pstuifzand/6650365 to your computer and use it in GitHub Desktop.
Save pstuifzand/6650365 to your computer and use it in GitHub Desktop.
Example of Marpa in C++.
const char* marpa_errors[92] = {
"ERR_NONE",
"ERR_AHFA_IX_NEGATIVE",
"ERR_AHFA_IX_OOB",
"ERR_ANDID_NEGATIVE",
"ERR_ANDID_NOT_IN_OR",
"ERR_ANDIX_NEGATIVE",
"ERR_BAD_SEPARATOR",
"ERR_BOCAGE_ITERATION_EXHAUSTED",
"ERR_COUNTED_NULLABLE",
"ERR_DEVELOPMENT",
"ERR_DUPLICATE_AND_NODE",
"ERR_DUPLICATE_RULE",
"ERR_DUPLICATE_TOKEN",
"ERR_EIM_COUNT",
"ERR_EIM_ID_INVALID",
"ERR_EVENT_IX_NEGATIVE",
"ERR_EVENT_IX_OOB",
"ERR_GRAMMAR_HAS_CYCLE",
"ERR_INACCESSIBLE_TOKEN",
"ERR_INTERNAL",
"ERR_INVALID_AHFA_ID",
"ERR_INVALID_AIMID",
"ERR_INVALID_BOOLEAN",
"ERR_INVALID_IRLID",
"ERR_INVALID_ISYID",
"ERR_INVALID_LOCATION",
"ERR_INVALID_RULE_ID",
"ERR_INVALID_START_SYMBOL",
"ERR_INVALID_SYMBOL_ID",
"ERR_I_AM_NOT_OK",
"ERR_MAJOR_VERSION_MISMATCH",
"ERR_MICRO_VERSION_MISMATCH",
"ERR_MINOR_VERSION_MISMATCH",
"ERR_NOOKID_NEGATIVE",
"ERR_NOT_PRECOMPUTED",
"ERR_NOT_TRACING_COMPLETION_LINKS",
"ERR_NOT_TRACING_LEO_LINKS",
"ERR_NOT_TRACING_TOKEN_LINKS",
"ERR_NO_AND_NODES",
"ERR_NO_EARLEY_SET_AT_LOCATION",
"ERR_NO_OR_NODES",
"ERR_NO_PARSE",
"ERR_NO_RULES",
"ERR_NO_START_SYMBOL",
"ERR_NO_TOKEN_EXPECTED_HERE",
"ERR_NO_TRACE_EIM",
"ERR_NO_TRACE_ES",
"ERR_NO_TRACE_PIM",
"ERR_NO_TRACE_SRCL",
"ERR_NULLING_TERMINAL",
"ERR_ORDER_FROZEN",
"ERR_ORID_NEGATIVE",
"ERR_OR_ALREADY_ORDERED",
"ERR_PARSE_EXHAUSTED",
"ERR_PARSE_TOO_LONG",
"ERR_PIM_IS_NOT_LIM",
"ERR_POINTER_ARG_NULL",
"ERR_PRECOMPUTED",
"ERR_PROGRESS_REPORT_EXHAUSTED",
"ERR_PROGRESS_REPORT_NOT_STARTED",
"ERR_RECCE_NOT_ACCEPTING_INPUT",
"ERR_RECCE_NOT_STARTED",
"ERR_RECCE_STARTED",
"ERR_RHS_IX_NEGATIVE",
"ERR_RHS_IX_OOB",
"ERR_RHS_TOO_LONG",
"ERR_SEQUENCE_LHS_NOT_UNIQUE",
"ERR_SOURCE_TYPE_IS_AMBIGUOUS",
"ERR_SOURCE_TYPE_IS_COMPLETION",
"ERR_SOURCE_TYPE_IS_LEO",
"ERR_SOURCE_TYPE_IS_NONE",
"ERR_SOURCE_TYPE_IS_TOKEN",
"ERR_SOURCE_TYPE_IS_UNKNOWN",
"ERR_START_NOT_LHS",
"ERR_SYMBOL_VALUED_CONFLICT",
"ERR_TERMINAL_IS_LOCKED",
"ERR_TOKEN_IS_NOT_TERMINAL",
"ERR_TOKEN_LENGTH_LE_ZERO",
"ERR_TOKEN_TOO_LONG",
"ERR_TREE_EXHAUSTED",
"ERR_TREE_PAUSED",
"ERR_UNEXPECTED_TOKEN_ID",
"ERR_UNPRODUCTIVE_START",
"ERR_VALUATOR_INACTIVE",
"ERR_VALUED_IS_LOCKED",
"ERR_RANK_TOO_LOW",
"ERR_RANK_TOO_HIGH",
"ERR_SYMBOL_IS_NULLING",
"ERR_SYMBOL_IS_UNUSED",
"ERR_NO_SUCH_RULE_ID",
"ERR_NO_SUCH_SYMBOL_ID",
"ERR_BEFORE_FIRST_TREE",
};
#include <vector>
#include <algorithm>
#include <iostream>
#include "util.h"
#include "marpa.h"
extern const char* marpa_errors[92];
using namespace marpa;
int main()
{
grammar g;
/* DEFINE GRAMMAR */
grammar::symbol_id R_START = g.new_symbol();
grammar::symbol_id R_EXPR = g.new_symbol();
grammar::symbol_id R_FACTOR = g.new_symbol();
grammar::symbol_id R_TERM = g.new_symbol();
grammar::symbol_id T_ADD = g.new_symbol();
grammar::symbol_id T_MUL = g.new_symbol();
grammar::symbol_id T_NUM = g.new_symbol();
grammar::symbol_id T_LB = g.new_symbol();
grammar::symbol_id T_RB = g.new_symbol();
g.start_symbol(R_START);
using rule = grammar::rule_id;
rule rule_id_start = g.add_rule(R_START, { R_EXPR });
rule rule_id_expr_0 = g.add_rule(R_EXPR, { R_TERM });
rule rule_id_term_0 = g.add_rule(R_TERM, { R_TERM, T_ADD, R_TERM });
rule rule_id_term_1 = g.add_rule(R_TERM, { R_FACTOR });
rule rule_id_factor_0 = g.add_rule(R_FACTOR, { R_FACTOR, T_MUL, R_FACTOR });
rule rule_id_factor_1 = g.add_rule(R_FACTOR, { T_LB, R_EXPR, T_RB });
rule rule_id_factor_2 = g.add_rule(R_FACTOR, { T_NUM });
/* END OF GRAMMAR */
if (g.precompute() < 0) {
std::cout << "precompute() failed\n";
std::cout << marpa_errors[g.error()] << "\n";
std::cout << "\n";
exit(1);
}
recognizer r{g};
/* READ TOKENS */
std::string input = "10 + 10 * 39";
std::string::iterator it = input.begin();
while (it != input.end()) {
if (isspace(*it)) {
it++;
}
else if (isdigit(*it)) {
auto n = parse_digit(it, input.end(), 10, '0');
r.read(T_NUM, n.second, 1);
it = n.first;
}
else if (*it == '+') {
r.read(T_ADD, 0, 1);
it++;
}
else if (*it == '*') {
r.read(T_MUL, 0, 1);
it++;
}
else if (*it == '(' || *it == ')') {
r.read(*it == '(' ? T_LB : T_RB, 0, 1);
it++;
}
}
bocage b{r, r.latest_earley_set()};
order o{b};
tree t{o};
/* Evaluate trees */
while (t.next() >= 0) {
value v{t};
v.rule_is_valued(rule_id_start, 1);
v.rule_is_valued(rule_id_expr_0, 1);
v.rule_is_valued(rule_id_term_0, 1);
v.rule_is_valued(rule_id_term_1, 1);
v.rule_is_valued(rule_id_factor_0, 1);
v.rule_is_valued(rule_id_factor_1, 1);
v.rule_is_valued(rule_id_factor_2, 1);
std::vector<int> stack;
for (;;) {
value::step_type type = v.step();
switch (type) {
case MARPA_STEP_INITIAL:
stack.resize(1);
break;
case MARPA_STEP_TOKEN: {
stack.resize(std::max((std::vector<int>::size_type)v.result()+1, stack.size()));
stack[v.result()] = v.token_value();
break;
}
case MARPA_STEP_RULE: {
grammar::rule_id rule = v.rule();
int res = v.result();
/* BEGIN OF RULE SEMANTICS */
if (rule == rule_id_start) {
stack[res] = stack[v.arg_0()];
}
else if (rule == rule_id_expr_0) {
stack[res] = stack[v.arg_0()];
}
else if (rule == rule_id_term_0) {
stack[res] = stack[v.arg_0()] + stack[v.arg_0() + 2];
}
else if (rule == rule_id_term_1) {
stack[res] = stack[v.arg_0()];
}
else if (rule == rule_id_factor_0) {
stack[res] = stack[v.arg_0()] * stack[v.arg_0() + 2];
}
else if (rule == rule_id_factor_1) {
stack[res] = stack[v.arg_0() + 1];
}
else if (rule == rule_id_factor_2) {
stack[res] = stack[v.arg_0()];
}
/* END OF RULE SEMANTICS */
break;
}
case MARPA_STEP_NULLING_SYMBOL: {
int res = v.result();
stack[res] = v.token_value();
break;
}
case MARPA_STEP_INACTIVE:
goto END;
}
}
END:
std::cout << stack[0] << "\n";
}
return 0;
}
all: main
clean:
rm -f main.o
rm -f main
main.cpp: marpa.h
main: main.cpp errors.cpp
gcc -o $@ $^ libmarpa.a -lstdc++ -std=c++11 -Wall -g
#ifndef MARPA_H
#define MARPA_H
extern "C" {
#include <marpa.h>
#include <marpa_api.h>
}
namespace marpa {
class grammar {
public:
typedef int earleme;
typedef int error_code;
typedef int symbol_id;
typedef int rule_id;
typedef int event_type;
typedef int rank;
private:
typedef Marpa_Grammar handle_type;
public:
grammar() : handle(marpa_g_new(0)) {}
grammar(const grammar& g) {
marpa_g_ref(g.handle);
this->handle = g.handle;
}
grammar& operator=(const grammar& g) {
marpa_g_ref(g.handle);
this->handle = g.handle;
return *this;
}
~grammar() {
marpa_g_unref(handle);
}
symbol_id start_symbol() const {
return marpa_g_start_symbol(handle);
}
symbol_id start_symbol(symbol_id s) {
return marpa_g_start_symbol_set(handle, s);
}
symbol_id new_symbol() {
return marpa_g_symbol_new(handle);
}
rule_id new_rule(symbol_id lhs_id, symbol_id* rhs_ids, int length) {
return marpa_g_rule_new(handle, lhs_id, rhs_ids, length);
}
rule_id add_rule(symbol_id lhs_id, std::initializer_list<symbol_id> rhs) {
grammar::rule_id rhs_ids[rhs.size()];
std::copy(rhs.begin(), rhs.end(), rhs_ids);
return new_rule(lhs_id, rhs_ids, rhs.size());
}
rule_id new_sequence(symbol_id lhs_id, symbol_id rhs_id, symbol_id separator_id, int min, int flags) {
return marpa_g_sequence_new(handle, lhs_id, rhs_id, separator_id, min, flags);
}
int precompute() {
return marpa_g_precompute(handle);
}
int error() {
return marpa_g_error(handle, 0);
}
public:
handle_type internal_handle() { return handle; }
private:
handle_type handle;
};
class recognizer {
private:
typedef Marpa_Recognizer handle_type;
handle_type handle;
public:
typedef int earley_set_id;
handle_type internal_handle() {
return handle;
}
public:
recognizer(grammar& g)
: handle(marpa_r_new(g.internal_handle())) {
start_input();
}
~recognizer() {
marpa_r_unref(handle);
}
void start_input() {
marpa_r_start_input(handle);
}
void alternative(grammar::symbol_id sym_id, int value, int length) {
marpa_r_alternative(handle, sym_id, value, length);
}
void earleme_complete() {
marpa_r_earleme_complete(handle);
}
earley_set_id latest_earley_set() {
return marpa_r_latest_earley_set(handle);
}
void read(grammar::symbol_id sym_id, int value, int length) {
alternative(sym_id, value, length);
earleme_complete();
}
private:
recognizer& operator=(const recognizer&);
recognizer(const recognizer&);
};
class bocage {
private:
typedef Marpa_Bocage handle_type;
handle_type handle;
public:
handle_type internal_handle() { return handle; }
public:
bocage(recognizer& r, recognizer::earley_set_id set_id) : handle(marpa_b_new(r.internal_handle(), set_id)) {}
~bocage() { marpa_b_unref(handle); }
private:
bocage& operator=(const bocage&);
bocage(const bocage&);
};
class order {
private:
typedef Marpa_Order handle_type;
handle_type handle;
public:
handle_type internal_handle() { return handle; }
public:
order(bocage& b) : handle(marpa_o_new(b.internal_handle())) {}
private:
order& operator=(const order&);
order(const order&);
};
class tree {
private:
typedef Marpa_Tree handle_type;
handle_type handle;
public:
handle_type internal_handle() { return handle; }
public:
tree(order& o) : handle(marpa_t_new(o.internal_handle())) {}
int next() { return marpa_t_next(handle); }
private:
tree& operator=(const tree&);
tree(const tree&);
};
class value {
public:
typedef int step_type;
private:
typedef Marpa_Value handle_type;
handle_type handle;
public:
handle_type internal_handle() { return handle; }
public:
value(tree& t) : handle(marpa_v_new(t.internal_handle())) {}
step_type step() { return marpa_v_step(handle); }
void rule_is_valued(grammar::rule_id rule, int value) { marpa_v_rule_is_valued_set(handle, rule, value); }
int result() { return marpa_v_result(handle); }
int arg_0() { return marpa_v_arg_0(handle); }
int arg_n() { return marpa_v_arg_n(handle); }
int token_value() { return marpa_v_token_value(handle); }
int rule() { return marpa_v_rule(handle); }
private:
value& operator=(const value&);
value(const value&);
};
class event {
};
}
#endif
template <class I, class N, class C>
std::pair<I, N> parse_digit(I first, I last, N base, C zero)
{
if (first == last) return make_pair(last, 0);
N number = N(0);
while (first != last && isdigit(*first)) {
number *= base;
if (*first >= zero && *first < zero + base) {
number += *first - zero;
first++;
}
}
return make_pair(first, number);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment