Last active
December 23, 2015 14:39
-
-
Save pstuifzand/6650365 to your computer and use it in GitHub Desktop.
Example of Marpa in C++.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const char* marpa_errors[92] = { | |
"ERR_NONE", | |
"ERR_AHFA_IX_NEGATIVE", | |
"ERR_AHFA_IX_OOB", | |
"ERR_ANDID_NEGATIVE", | |
"ERR_ANDID_NOT_IN_OR", | |
"ERR_ANDIX_NEGATIVE", | |
"ERR_BAD_SEPARATOR", | |
"ERR_BOCAGE_ITERATION_EXHAUSTED", | |
"ERR_COUNTED_NULLABLE", | |
"ERR_DEVELOPMENT", | |
"ERR_DUPLICATE_AND_NODE", | |
"ERR_DUPLICATE_RULE", | |
"ERR_DUPLICATE_TOKEN", | |
"ERR_EIM_COUNT", | |
"ERR_EIM_ID_INVALID", | |
"ERR_EVENT_IX_NEGATIVE", | |
"ERR_EVENT_IX_OOB", | |
"ERR_GRAMMAR_HAS_CYCLE", | |
"ERR_INACCESSIBLE_TOKEN", | |
"ERR_INTERNAL", | |
"ERR_INVALID_AHFA_ID", | |
"ERR_INVALID_AIMID", | |
"ERR_INVALID_BOOLEAN", | |
"ERR_INVALID_IRLID", | |
"ERR_INVALID_ISYID", | |
"ERR_INVALID_LOCATION", | |
"ERR_INVALID_RULE_ID", | |
"ERR_INVALID_START_SYMBOL", | |
"ERR_INVALID_SYMBOL_ID", | |
"ERR_I_AM_NOT_OK", | |
"ERR_MAJOR_VERSION_MISMATCH", | |
"ERR_MICRO_VERSION_MISMATCH", | |
"ERR_MINOR_VERSION_MISMATCH", | |
"ERR_NOOKID_NEGATIVE", | |
"ERR_NOT_PRECOMPUTED", | |
"ERR_NOT_TRACING_COMPLETION_LINKS", | |
"ERR_NOT_TRACING_LEO_LINKS", | |
"ERR_NOT_TRACING_TOKEN_LINKS", | |
"ERR_NO_AND_NODES", | |
"ERR_NO_EARLEY_SET_AT_LOCATION", | |
"ERR_NO_OR_NODES", | |
"ERR_NO_PARSE", | |
"ERR_NO_RULES", | |
"ERR_NO_START_SYMBOL", | |
"ERR_NO_TOKEN_EXPECTED_HERE", | |
"ERR_NO_TRACE_EIM", | |
"ERR_NO_TRACE_ES", | |
"ERR_NO_TRACE_PIM", | |
"ERR_NO_TRACE_SRCL", | |
"ERR_NULLING_TERMINAL", | |
"ERR_ORDER_FROZEN", | |
"ERR_ORID_NEGATIVE", | |
"ERR_OR_ALREADY_ORDERED", | |
"ERR_PARSE_EXHAUSTED", | |
"ERR_PARSE_TOO_LONG", | |
"ERR_PIM_IS_NOT_LIM", | |
"ERR_POINTER_ARG_NULL", | |
"ERR_PRECOMPUTED", | |
"ERR_PROGRESS_REPORT_EXHAUSTED", | |
"ERR_PROGRESS_REPORT_NOT_STARTED", | |
"ERR_RECCE_NOT_ACCEPTING_INPUT", | |
"ERR_RECCE_NOT_STARTED", | |
"ERR_RECCE_STARTED", | |
"ERR_RHS_IX_NEGATIVE", | |
"ERR_RHS_IX_OOB", | |
"ERR_RHS_TOO_LONG", | |
"ERR_SEQUENCE_LHS_NOT_UNIQUE", | |
"ERR_SOURCE_TYPE_IS_AMBIGUOUS", | |
"ERR_SOURCE_TYPE_IS_COMPLETION", | |
"ERR_SOURCE_TYPE_IS_LEO", | |
"ERR_SOURCE_TYPE_IS_NONE", | |
"ERR_SOURCE_TYPE_IS_TOKEN", | |
"ERR_SOURCE_TYPE_IS_UNKNOWN", | |
"ERR_START_NOT_LHS", | |
"ERR_SYMBOL_VALUED_CONFLICT", | |
"ERR_TERMINAL_IS_LOCKED", | |
"ERR_TOKEN_IS_NOT_TERMINAL", | |
"ERR_TOKEN_LENGTH_LE_ZERO", | |
"ERR_TOKEN_TOO_LONG", | |
"ERR_TREE_EXHAUSTED", | |
"ERR_TREE_PAUSED", | |
"ERR_UNEXPECTED_TOKEN_ID", | |
"ERR_UNPRODUCTIVE_START", | |
"ERR_VALUATOR_INACTIVE", | |
"ERR_VALUED_IS_LOCKED", | |
"ERR_RANK_TOO_LOW", | |
"ERR_RANK_TOO_HIGH", | |
"ERR_SYMBOL_IS_NULLING", | |
"ERR_SYMBOL_IS_UNUSED", | |
"ERR_NO_SUCH_RULE_ID", | |
"ERR_NO_SUCH_SYMBOL_ID", | |
"ERR_BEFORE_FIRST_TREE", | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <vector> | |
#include <algorithm> | |
#include <iostream> | |
#include "util.h" | |
#include "marpa.h" | |
extern const char* marpa_errors[92]; | |
using namespace marpa; | |
int main() | |
{ | |
grammar g; | |
/* DEFINE GRAMMAR */ | |
grammar::symbol_id R_START = g.new_symbol(); | |
grammar::symbol_id R_EXPR = g.new_symbol(); | |
grammar::symbol_id R_FACTOR = g.new_symbol(); | |
grammar::symbol_id R_TERM = g.new_symbol(); | |
grammar::symbol_id T_ADD = g.new_symbol(); | |
grammar::symbol_id T_MUL = g.new_symbol(); | |
grammar::symbol_id T_NUM = g.new_symbol(); | |
grammar::symbol_id T_LB = g.new_symbol(); | |
grammar::symbol_id T_RB = g.new_symbol(); | |
g.start_symbol(R_START); | |
using rule = grammar::rule_id; | |
rule rule_id_start = g.add_rule(R_START, { R_EXPR }); | |
rule rule_id_expr_0 = g.add_rule(R_EXPR, { R_TERM }); | |
rule rule_id_term_0 = g.add_rule(R_TERM, { R_TERM, T_ADD, R_TERM }); | |
rule rule_id_term_1 = g.add_rule(R_TERM, { R_FACTOR }); | |
rule rule_id_factor_0 = g.add_rule(R_FACTOR, { R_FACTOR, T_MUL, R_FACTOR }); | |
rule rule_id_factor_1 = g.add_rule(R_FACTOR, { T_LB, R_EXPR, T_RB }); | |
rule rule_id_factor_2 = g.add_rule(R_FACTOR, { T_NUM }); | |
/* END OF GRAMMAR */ | |
if (g.precompute() < 0) { | |
std::cout << "precompute() failed\n"; | |
std::cout << marpa_errors[g.error()] << "\n"; | |
std::cout << "\n"; | |
exit(1); | |
} | |
recognizer r{g}; | |
/* READ TOKENS */ | |
std::string input = "10 + 10 * 39"; | |
std::string::iterator it = input.begin(); | |
while (it != input.end()) { | |
if (isspace(*it)) { | |
it++; | |
} | |
else if (isdigit(*it)) { | |
auto n = parse_digit(it, input.end(), 10, '0'); | |
r.read(T_NUM, n.second, 1); | |
it = n.first; | |
} | |
else if (*it == '+') { | |
r.read(T_ADD, 0, 1); | |
it++; | |
} | |
else if (*it == '*') { | |
r.read(T_MUL, 0, 1); | |
it++; | |
} | |
else if (*it == '(' || *it == ')') { | |
r.read(*it == '(' ? T_LB : T_RB, 0, 1); | |
it++; | |
} | |
} | |
bocage b{r, r.latest_earley_set()}; | |
order o{b}; | |
tree t{o}; | |
/* Evaluate trees */ | |
while (t.next() >= 0) { | |
value v{t}; | |
v.rule_is_valued(rule_id_start, 1); | |
v.rule_is_valued(rule_id_expr_0, 1); | |
v.rule_is_valued(rule_id_term_0, 1); | |
v.rule_is_valued(rule_id_term_1, 1); | |
v.rule_is_valued(rule_id_factor_0, 1); | |
v.rule_is_valued(rule_id_factor_1, 1); | |
v.rule_is_valued(rule_id_factor_2, 1); | |
std::vector<int> stack; | |
for (;;) { | |
value::step_type type = v.step(); | |
switch (type) { | |
case MARPA_STEP_INITIAL: | |
stack.resize(1); | |
break; | |
case MARPA_STEP_TOKEN: { | |
stack.resize(std::max((std::vector<int>::size_type)v.result()+1, stack.size())); | |
stack[v.result()] = v.token_value(); | |
break; | |
} | |
case MARPA_STEP_RULE: { | |
grammar::rule_id rule = v.rule(); | |
int res = v.result(); | |
/* BEGIN OF RULE SEMANTICS */ | |
if (rule == rule_id_start) { | |
stack[res] = stack[v.arg_0()]; | |
} | |
else if (rule == rule_id_expr_0) { | |
stack[res] = stack[v.arg_0()]; | |
} | |
else if (rule == rule_id_term_0) { | |
stack[res] = stack[v.arg_0()] + stack[v.arg_0() + 2]; | |
} | |
else if (rule == rule_id_term_1) { | |
stack[res] = stack[v.arg_0()]; | |
} | |
else if (rule == rule_id_factor_0) { | |
stack[res] = stack[v.arg_0()] * stack[v.arg_0() + 2]; | |
} | |
else if (rule == rule_id_factor_1) { | |
stack[res] = stack[v.arg_0() + 1]; | |
} | |
else if (rule == rule_id_factor_2) { | |
stack[res] = stack[v.arg_0()]; | |
} | |
/* END OF RULE SEMANTICS */ | |
break; | |
} | |
case MARPA_STEP_NULLING_SYMBOL: { | |
int res = v.result(); | |
stack[res] = v.token_value(); | |
break; | |
} | |
case MARPA_STEP_INACTIVE: | |
goto END; | |
} | |
} | |
END: | |
std::cout << stack[0] << "\n"; | |
} | |
return 0; | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
all: main | |
clean: | |
rm -f main.o | |
rm -f main | |
main.cpp: marpa.h | |
main: main.cpp errors.cpp | |
gcc -o $@ $^ libmarpa.a -lstdc++ -std=c++11 -Wall -g | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef MARPA_H | |
#define MARPA_H | |
extern "C" { | |
#include <marpa.h> | |
#include <marpa_api.h> | |
} | |
namespace marpa { | |
class grammar { | |
public: | |
typedef int earleme; | |
typedef int error_code; | |
typedef int symbol_id; | |
typedef int rule_id; | |
typedef int event_type; | |
typedef int rank; | |
private: | |
typedef Marpa_Grammar handle_type; | |
public: | |
grammar() : handle(marpa_g_new(0)) {} | |
grammar(const grammar& g) { | |
marpa_g_ref(g.handle); | |
this->handle = g.handle; | |
} | |
grammar& operator=(const grammar& g) { | |
marpa_g_ref(g.handle); | |
this->handle = g.handle; | |
return *this; | |
} | |
~grammar() { | |
marpa_g_unref(handle); | |
} | |
symbol_id start_symbol() const { | |
return marpa_g_start_symbol(handle); | |
} | |
symbol_id start_symbol(symbol_id s) { | |
return marpa_g_start_symbol_set(handle, s); | |
} | |
symbol_id new_symbol() { | |
return marpa_g_symbol_new(handle); | |
} | |
rule_id new_rule(symbol_id lhs_id, symbol_id* rhs_ids, int length) { | |
return marpa_g_rule_new(handle, lhs_id, rhs_ids, length); | |
} | |
rule_id add_rule(symbol_id lhs_id, std::initializer_list<symbol_id> rhs) { | |
grammar::rule_id rhs_ids[rhs.size()]; | |
std::copy(rhs.begin(), rhs.end(), rhs_ids); | |
return new_rule(lhs_id, rhs_ids, rhs.size()); | |
} | |
rule_id new_sequence(symbol_id lhs_id, symbol_id rhs_id, symbol_id separator_id, int min, int flags) { | |
return marpa_g_sequence_new(handle, lhs_id, rhs_id, separator_id, min, flags); | |
} | |
int precompute() { | |
return marpa_g_precompute(handle); | |
} | |
int error() { | |
return marpa_g_error(handle, 0); | |
} | |
public: | |
handle_type internal_handle() { return handle; } | |
private: | |
handle_type handle; | |
}; | |
class recognizer { | |
private: | |
typedef Marpa_Recognizer handle_type; | |
handle_type handle; | |
public: | |
typedef int earley_set_id; | |
handle_type internal_handle() { | |
return handle; | |
} | |
public: | |
recognizer(grammar& g) | |
: handle(marpa_r_new(g.internal_handle())) { | |
start_input(); | |
} | |
~recognizer() { | |
marpa_r_unref(handle); | |
} | |
void start_input() { | |
marpa_r_start_input(handle); | |
} | |
void alternative(grammar::symbol_id sym_id, int value, int length) { | |
marpa_r_alternative(handle, sym_id, value, length); | |
} | |
void earleme_complete() { | |
marpa_r_earleme_complete(handle); | |
} | |
earley_set_id latest_earley_set() { | |
return marpa_r_latest_earley_set(handle); | |
} | |
void read(grammar::symbol_id sym_id, int value, int length) { | |
alternative(sym_id, value, length); | |
earleme_complete(); | |
} | |
private: | |
recognizer& operator=(const recognizer&); | |
recognizer(const recognizer&); | |
}; | |
class bocage { | |
private: | |
typedef Marpa_Bocage handle_type; | |
handle_type handle; | |
public: | |
handle_type internal_handle() { return handle; } | |
public: | |
bocage(recognizer& r, recognizer::earley_set_id set_id) : handle(marpa_b_new(r.internal_handle(), set_id)) {} | |
~bocage() { marpa_b_unref(handle); } | |
private: | |
bocage& operator=(const bocage&); | |
bocage(const bocage&); | |
}; | |
class order { | |
private: | |
typedef Marpa_Order handle_type; | |
handle_type handle; | |
public: | |
handle_type internal_handle() { return handle; } | |
public: | |
order(bocage& b) : handle(marpa_o_new(b.internal_handle())) {} | |
private: | |
order& operator=(const order&); | |
order(const order&); | |
}; | |
class tree { | |
private: | |
typedef Marpa_Tree handle_type; | |
handle_type handle; | |
public: | |
handle_type internal_handle() { return handle; } | |
public: | |
tree(order& o) : handle(marpa_t_new(o.internal_handle())) {} | |
int next() { return marpa_t_next(handle); } | |
private: | |
tree& operator=(const tree&); | |
tree(const tree&); | |
}; | |
class value { | |
public: | |
typedef int step_type; | |
private: | |
typedef Marpa_Value handle_type; | |
handle_type handle; | |
public: | |
handle_type internal_handle() { return handle; } | |
public: | |
value(tree& t) : handle(marpa_v_new(t.internal_handle())) {} | |
step_type step() { return marpa_v_step(handle); } | |
void rule_is_valued(grammar::rule_id rule, int value) { marpa_v_rule_is_valued_set(handle, rule, value); } | |
int result() { return marpa_v_result(handle); } | |
int arg_0() { return marpa_v_arg_0(handle); } | |
int arg_n() { return marpa_v_arg_n(handle); } | |
int token_value() { return marpa_v_token_value(handle); } | |
int rule() { return marpa_v_rule(handle); } | |
private: | |
value& operator=(const value&); | |
value(const value&); | |
}; | |
class event { | |
}; | |
} | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template <class I, class N, class C> | |
std::pair<I, N> parse_digit(I first, I last, N base, C zero) | |
{ | |
if (first == last) return make_pair(last, 0); | |
N number = N(0); | |
while (first != last && isdigit(*first)) { | |
number *= base; | |
if (*first >= zero && *first < zero + base) { | |
number += *first - zero; | |
first++; | |
} | |
} | |
return make_pair(first, number); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment