Created
October 21, 2015 11:30
-
-
Save marionette-of-u/197612d76022858327aa to your computer and use it in GitHub Desktop.
lexerに転用するためのautomatonです。正規表現からNFA、NFAからDFA、DFAから最小DFAへの変換が終わりました。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%token charactor<node*> vertical_bar dot asterisk plus question l_bracket r_bracket l_paren r_paren concat hyphen; | |
%namespace regex_parser; | |
S<node*> | |
: [make_identity] LevelC(0) | |
; | |
LevelC<node*> | |
: [make_or] LevelB(0) vertical_bar LevelC(1) | |
| [make_concat] LevelB(0) LevelC(1) | |
| [make_identity] LevelB(0) | |
; | |
LevelB<node*> | |
: [make_kleene] LevelA(0) asterisk | |
| [make_kleene_plus] LevelA(0) plus | |
| [make_one_or_zero] LevelA(0) question | |
| [make_identity] LevelA(0) | |
; | |
LevelA<node*> | |
: [make_charactor] charactor(0) | |
| [make_dot] dot | |
| [make_class] l_bracket CharactorSequence(0) r_bracket | |
| [make_identity] l_paren LevelC(0) r_paren | |
; | |
CharactorSequence<node*> | |
: [make_seq] charactor(0) | |
| [make_seq] CharactorSequence(0) charactor(1) | |
| [make_class_range] CharactorRange(0) | |
| [make_class_range] CharactorSequence(0) CharactorRange(1) | |
; | |
CharactorRange<node*> | |
: [make_range] charactor(0) hyphen charactor(1) | |
; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <functional> | |
#include <exception> | |
#include <iostream> | |
#include <typeinfo> | |
#include <utility> | |
#include <string> | |
#include <vector> | |
#include <memory> | |
#include <set> | |
#include <map> | |
#include "regex_parser.hpp" | |
namespace automaton{ | |
class node{ | |
public: | |
node() = default; | |
node(const node &other) : edge(other.edge), token_name(new std::string(*other.token_name)){} | |
node(node &&other) : edge(std::move(other.edge)), token_name(std::move(other.token_name)){} | |
~node() = default; | |
std::vector<std::pair<char, std::size_t>> edge; | |
std::unique_ptr<std::string> token_name; | |
}; | |
using node_pool = std::vector<node>; | |
std::set<std::size_t> edge(const node_pool &pool, std::size_t s, char c){ | |
std::set<std::size_t> ret; | |
for(auto &i : pool[s].edge){ | |
if(i.first == c){ | |
ret.insert(i.second); | |
} | |
} | |
return ret; | |
} | |
std::set<std::size_t> closure(const node_pool &pool, std::set<std::size_t> T){ | |
std::set<std::size_t> U; | |
do{ | |
U = T; | |
std::set<std::size_t> V; | |
for(auto &s : U){ | |
std::set<std::size_t> W = edge(pool, s, '\0'); | |
V.insert(W.begin(), W.end()); | |
} | |
for(auto &i : U){ | |
T.insert(i); | |
} | |
for(auto &i : V){ | |
T.insert(i); | |
} | |
}while(T != U); | |
return T; | |
} | |
std::set<std::size_t> DFA_edge(const node_pool &pool, const std::set<std::size_t> &d, char c){ | |
std::set<std::size_t> e; | |
for(auto &s : d){ | |
std::set<std::size_t> f = edge(pool, s, c); | |
e.insert(f.begin(), f.end()); | |
} | |
return closure(pool, e); | |
} | |
struct error_ambiguous_nonterminal_token : public std::runtime_error{ | |
error_ambiguous_nonterminal_token(const std::string &str) : std::runtime_error(str){} | |
}; | |
std::set<char> collect_char(const node_pool &pool){ | |
std::set<char> s; | |
for(auto &i : pool){ | |
for(auto &k : i.edge){ | |
s.insert(k.first); | |
} | |
} | |
return s; | |
} | |
node_pool NFA_to_DFA(const node_pool &pool){ | |
node_pool trans; | |
{ | |
std::set<char> sigma = collect_char(pool); | |
std::vector<std::set<std::size_t>> states = { {}, { closure(pool, { 0 }) } }; | |
std::size_t p = 1, j = 0; | |
while(j <= p){ | |
for(char c : sigma){ | |
if(c == '\0'){ continue; } | |
std::set<std::size_t> e = DFA_edge(pool, states[j], c); | |
bool find = false; | |
std::size_t i; | |
for(i = 0; i <= p; ++i){ | |
if(e == states[i]){ | |
find = true; | |
break; | |
} | |
} | |
auto check_ender_tokens = [&](std::size_t i){ | |
trans[j].edge.push_back(std::make_pair(c, i)); | |
for(std::size_t n : states[j]){ | |
if(pool[n].token_name){ | |
trans[j].token_name.reset(new std::string(*pool[n].token_name)); | |
break; | |
} | |
} | |
}; | |
if(find){ | |
if(i != 0){ | |
if(trans.size() <= j){ | |
trans.resize(j + 1); | |
} | |
check_ender_tokens(i); | |
} | |
}else{ | |
++p; | |
if(states.size() <= p){ | |
states.resize(p + 1); | |
} | |
states[p] = e; | |
if(trans.size() <= j){ | |
trans.resize(j + 1); | |
} | |
check_ender_tokens(p); | |
} | |
} | |
++j; | |
} | |
} | |
return trans; | |
} | |
} // namespace automaton | |
namespace regex_parser{ | |
struct token{ | |
char c; | |
int type; | |
}; | |
struct node{ | |
enum op_type_enum{ | |
op_or, | |
op_dot, | |
op_concat, | |
op_kleene, | |
op_kleene_plus, | |
op_one_or_zero, | |
op_seq, | |
op_range, | |
op_class, | |
op_charactor | |
}; | |
char c; | |
op_type_enum op_type; | |
std::vector<node*> node_vec; | |
std::size_t to_nfa(std::size_t start, automaton::node_pool &pool) const{ | |
std::size_t r; | |
switch(op_type){ | |
case op_charactor: | |
{ | |
pool.push_back(automaton::node()); | |
pool[start].edge.push_back(std::make_pair(c, pool.size() - 1)); | |
r = pool.size() - 1; | |
} | |
break; | |
case op_class: | |
{ | |
pool.push_back(automaton::node()); | |
r = pool.size() - 1; | |
for(std::size_t i = 0; i < node_vec.size(); ++i){ | |
for(std::size_t j = 0; j < node_vec[i]->node_vec.size(); ++j){ | |
if(node_vec[i]->node_vec[j]->op_type == op_charactor){ | |
pool[start].edge.push_back(std::make_pair(node_vec[i]->node_vec[j]->c, r)); | |
}else if(node_vec[i]->node_vec[j]->op_type == op_range){ | |
if(node_vec[i]->node_vec[j]->node_vec[0]->c > node_vec[i]->node_vec[j]->node_vec[1]->c){ | |
std::swap(node_vec[i]->node_vec[j]->node_vec[0]->c, node_vec[i]->node_vec[j]->node_vec[1]->c); | |
} | |
for(char c = node_vec[i]->node_vec[j]->node_vec[0]->c; c <= node_vec[i]->node_vec[j]->node_vec[1]->c; ++c){ | |
pool[start].edge.push_back({}); | |
pool[start].edge.back().first = c; | |
pool[start].edge.back().second = r; | |
} | |
} | |
} | |
} | |
} | |
break; | |
case op_one_or_zero: | |
{ | |
pool.push_back(automaton::node()); | |
r = pool.size() - 1; | |
std::size_t n = node_vec[0]->to_nfa(start, pool); | |
pool[n].edge.push_back(std::make_pair('\0', r)); | |
pool[start].edge.push_back(std::make_pair('\0', r)); | |
} | |
break; | |
case op_kleene_plus: | |
{ | |
std::size_t q = node_vec[0]->to_nfa(start, pool); | |
r = node_vec[0]->to_nfa(q, pool); | |
pool[q].edge.push_back(std::make_pair('\0', r)); | |
pool[r].edge.push_back(std::make_pair('\0', q)); | |
} | |
break; | |
case op_kleene: | |
{ | |
r = node_vec[0]->to_nfa(start, pool); | |
pool[start].edge.push_back(std::make_pair('\0', r)); | |
pool[r].edge.push_back(std::make_pair('\0', start)); | |
} | |
break; | |
case op_concat: | |
{ | |
r = node_vec[0]->to_nfa(start, pool); | |
r = node_vec[1]->to_nfa(r, pool); | |
} | |
break; | |
case op_or: | |
{ | |
std::size_t e1 = node_vec[0]->to_nfa(start, pool); | |
std::size_t e2 = node_vec[1]->to_nfa(start, pool); | |
pool.push_back(automaton::node()); | |
r = pool.size() - 1; | |
pool[e1].edge.push_back(std::make_pair('\0', r)); | |
pool[e2].edge.push_back(std::make_pair('\0', r)); | |
} | |
break; | |
} | |
return r; | |
} | |
~node(){ | |
for(auto &i : node_vec){ | |
delete i; | |
} | |
} | |
}; | |
struct semantic_action{ | |
static node *make_identity(node *ptr){ | |
return ptr; | |
} | |
static node *make_or(node *p, node *q){ | |
node *r = new node; | |
r->op_type = node::op_or; | |
r->node_vec.push_back(p); | |
r->node_vec.push_back(q); | |
return r; | |
} | |
static node *make_concat(node *p, node *q){ | |
node *r = new node; | |
r->op_type = node::op_concat; | |
r->node_vec.push_back(p); | |
r->node_vec.push_back(q); | |
return r; | |
} | |
static node *make_kleene(node *p){ | |
node *r = new node; | |
r->op_type = node::op_kleene; | |
r->node_vec.push_back(p); | |
return r; | |
} | |
static node *make_kleene_plus(node *p){ | |
node *r = new node; | |
r->op_type = node::op_kleene_plus; | |
r->node_vec.push_back(p); | |
return r; | |
} | |
static node *make_one_or_zero(node *p){ | |
node *r = new node; | |
r->op_type = node::op_one_or_zero; | |
r->node_vec.push_back(p); | |
return r; | |
} | |
static node *make_seq(node *p){ | |
node *r = new node; | |
r->op_type = node::op_seq; | |
r->node_vec.push_back(p); | |
return r; | |
} | |
static node *make_seq(node *p, node *q){ | |
p->node_vec.push_back(q); | |
return p; | |
} | |
static node *make_class_range(node *p){ | |
return make_seq(p); | |
} | |
static node *make_class_range(node *p, node *q){ | |
return make_seq(p, q); | |
} | |
static node *make_range(node *p, node *q){ | |
node *r = new node; | |
r->op_type = node::op_range; | |
r->node_vec.push_back(p); | |
r->node_vec.push_back(q); | |
return r; | |
} | |
static node *make_charactor(node *p){ | |
return p; | |
} | |
static node *make_dot(){ | |
return nullptr; | |
} | |
static node *make_class(node *seq){ | |
node *r = new node; | |
r->op_type = node::op_class; | |
r->node_vec.push_back(seq); | |
return r; | |
} | |
void downcast(node *&x, node *y){ | |
x = y; | |
} | |
void upcast(node *&x, node *y){ | |
x = y; | |
} | |
void syntax_error(){ | |
throw; | |
} | |
void stack_overflow(){ | |
throw; | |
} | |
}; | |
template<class Iter> | |
std::vector<token> tokenize(Iter first, Iter last){ | |
std::vector<token> seq; | |
bool escape = false; | |
for(; first != last; ++first){ | |
token t; | |
char c = *first; | |
if(escape){ | |
t.c = *first; | |
t.type = regex_parser::token_charactor; | |
escape = false; | |
}else{ | |
if(c == '\\'){ | |
escape = true; | |
continue; | |
} | |
t.c = c; | |
switch(c){ | |
case '|': | |
t.type = regex_parser::token_vertical_bar; | |
break; | |
case '*': | |
t.type = regex_parser::token_asterisk; | |
break; | |
case '+': | |
t.type = regex_parser::token_plus; | |
break; | |
case '?': | |
t.type = regex_parser::token_question; | |
break; | |
case '[': | |
t.type = regex_parser::token_l_bracket; | |
break; | |
case ']': | |
t.type = regex_parser::token_r_bracket; | |
break; | |
case '(': | |
t.type = regex_parser::token_l_paren; | |
break; | |
case ')': | |
t.type = regex_parser::token_r_paren; | |
break; | |
case '-': | |
t.type = regex_parser::token_hyphen; | |
break; | |
default: | |
t.type = regex_parser::token_charactor; | |
break; | |
} | |
seq.push_back(t); | |
} | |
} | |
return seq; | |
} | |
} // namespace regex_parser | |
class lexer{ | |
public: | |
struct parsing_error : public std::runtime_error{ | |
parsing_error(const std::string &what) : std::runtime_error(what){} | |
~parsing_error() = default; | |
}; | |
lexer() = default; | |
virtual ~lexer() = default; | |
void add_rule(const std::string &str, const std::string &token_name){ | |
std::vector<regex_parser::token> seq = regex_parser::tokenize(str.begin(), str.end()); | |
regex_parser::semantic_action sa; | |
regex_parser::Parser<regex_parser::node*, regex_parser::semantic_action> parser(sa); | |
for(auto &i : seq){ | |
regex_parser::node *p = nullptr; | |
if(i.type == regex_parser::token_charactor){ | |
p = new regex_parser::node; | |
p->c = i.c; | |
p->op_type = regex_parser::node::op_charactor; | |
} | |
parser.post(static_cast<regex_parser::Token>(i.type), p); | |
} | |
parser.post(regex_parser::token_eof, nullptr); | |
std::unique_ptr<regex_parser::node> root; | |
{ | |
regex_parser::node *root_; | |
if(!parser.accept(root_)){ | |
throw parsing_error("parsing error (regular expression) : " + str); | |
} | |
root.reset(root_); | |
} | |
if(node_pool.empty()){ | |
node_pool.push_back({}); | |
std::size_t end = root->to_nfa(0, node_pool); | |
node_pool[end].token_name.reset(new std::string(token_name)); | |
}else{ | |
std::size_t start = node_pool.size(); | |
node_pool[0].edge.push_back(std::make_pair('\0', start)); | |
node_pool.resize(start + 1); | |
std::size_t end = root->to_nfa(start, node_pool); | |
node_pool[end].token_name.reset(new std::string(token_name)); | |
} | |
} | |
void build(){ | |
node_pool = automaton::NFA_to_DFA(node_pool); | |
optimization(); | |
} | |
private: | |
void optimization(){ | |
using edge = std::set<std::pair<char, std::size_t>>; | |
using node = std::set<edge>; | |
std::set<std::pair<std::size_t, std::size_t>> inequality_pair_set; | |
bool mod; | |
do{ | |
mod = false; | |
for(std::size_t i = 0; i < node_pool.size(); ++i){ | |
node node_i; | |
edge edge_i; | |
for(auto &a : node_pool[i].edge){ | |
edge_i.insert(a); | |
} | |
node_i.insert(std::move(edge_i)); | |
for(std::size_t j = i + 1; j < node_pool.size(); ++j){ | |
if( | |
node_pool[i].token_name && !node_pool[j].token_name || | |
!node_pool[i].token_name && node_pool[j].token_name || | |
node_pool[i].token_name != node_pool[j].token_name | |
){ | |
mod = mod || inequality_pair_set.insert(std::make_pair(i, j)).second; | |
}else{ | |
node node_j; | |
edge edge_j; | |
for(auto &a : node_pool[j].edge){ | |
edge_j.insert(a); | |
} | |
node_j.insert(std::move(edge_j)); | |
if(node_i != node_j){ | |
mod = mod || inequality_pair_set.insert(std::make_pair(i, j)).second; | |
} | |
} | |
} | |
} | |
}while(mod); | |
std::map<std::size_t, std::size_t> equality_pair_map; | |
for(std::size_t i = 0; i < node_pool.size(); ++i){ | |
for(std::size_t j = i + 1; j < node_pool.size(); ++j){ | |
if(inequality_pair_set.find(std::make_pair(i, j)) != inequality_pair_set.end()){ | |
continue; | |
}else{ | |
equality_pair_map.insert(std::make_pair(j, i)); | |
} | |
} | |
} | |
for(auto &a : node_pool){ | |
for(auto &e : a.edge){ | |
auto iter = equality_pair_map.find(e.second); | |
if(iter != equality_pair_map.end()){ | |
e.second = equality_pair_map[e.second]; | |
} | |
} | |
} | |
} | |
automaton::node_pool node_pool; | |
}; | |
int main(){ | |
lexer lex; | |
lex.add_rule("if", "if"); | |
lex.add_rule("[a-zA-Z]+", "identifier"); | |
lex.add_rule("[0-9]+", "number"); | |
lex.add_rule(".*", "other"); | |
lex.build(); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef REGEX_PARSER_HPP_ | |
#define REGEX_PARSER_HPP_ | |
// This file was automatically generated by Caper. | |
// (http://jonigata.github.io/caper/caper.html) | |
#include <cstdlib> | |
#include <cassert> | |
#include <vector> | |
namespace regex_parser { | |
enum Token { | |
token_eof, | |
token_asterisk, | |
token_charactor, | |
token_concat, | |
token_dot, | |
token_hyphen, | |
token_l_bracket, | |
token_l_paren, | |
token_plus, | |
token_question, | |
token_r_bracket, | |
token_r_paren, | |
token_vertical_bar, | |
}; | |
inline const char* token_label(Token t) { | |
static const char* labels[] = { | |
"token_eof", | |
"token_asterisk", | |
"token_charactor", | |
"token_concat", | |
"token_dot", | |
"token_hyphen", | |
"token_l_bracket", | |
"token_l_paren", | |
"token_plus", | |
"token_question", | |
"token_r_bracket", | |
"token_r_paren", | |
"token_vertical_bar", | |
}; | |
return labels[t]; | |
} | |
template <class T, unsigned int StackSize> | |
class Stack { | |
public: | |
Stack() { gap_ = 0; } | |
void rollback_tmp() { | |
gap_ = stack_.size(); | |
tmp_.clear(); | |
} | |
void commit_tmp() { | |
// may throw | |
stack_.reserve(gap_ + tmp_.size()); | |
// expect not to throw | |
stack_.erase(stack_.begin()+ gap_, stack_.end()); | |
stack_.insert(stack_.end(), tmp_.begin(), tmp_.end()); | |
tmp_.clear(); | |
} | |
bool push(const T& f) { | |
if (StackSize != 0 && | |
int(StackSize) <= int(stack_.size() + tmp_.size())) { | |
return false; | |
} | |
tmp_.push_back(f); | |
return true; | |
} | |
void pop(size_t n) { | |
if (tmp_.size() < n) { | |
n -= tmp_.size(); | |
tmp_.clear(); | |
gap_ -= n; | |
} else { | |
tmp_.erase(tmp_.end() - n, tmp_.end()); | |
} | |
} | |
T& top() { | |
assert(0 < depth()); | |
if (!tmp_.empty()) { | |
return tmp_.back(); | |
} else { | |
return stack_[gap_ - 1]; | |
} | |
} | |
const T& get_arg(size_t base, size_t index) { | |
size_t n = tmp_.size(); | |
if (base - index <= n) { | |
return tmp_[n - (base - index)]; | |
} else { | |
return stack_[gap_ - (base - n) + index]; | |
} | |
} | |
void clear() { | |
stack_.clear(); | |
tmp_.clear(); | |
gap_ = 0; | |
} | |
bool empty() const { | |
if (!tmp_.empty()) { | |
return false; | |
} else { | |
return gap_ == 0; | |
} | |
} | |
size_t depth() const { | |
return gap_ + tmp_.size(); | |
} | |
T& nth(size_t index) { | |
if (gap_ <= index) { | |
return tmp_[index - gap_]; | |
} else { | |
return stack_[index]; | |
} | |
} | |
void swap_top_and_second() { | |
int d = depth(); | |
assert(2 <= d); | |
T x = nth(d - 1); | |
nth(d - 1) = nth(d - 2); | |
nth(d - 2) = x; | |
} | |
private: | |
std::vector<T> stack_; | |
std::vector<T> tmp_; | |
size_t gap_; | |
}; | |
template <class Value, class SemanticAction, | |
unsigned int StackSize = 0> | |
class Parser { | |
public: | |
typedef Token token_type; | |
typedef Value value_type; | |
enum Nonterminal { | |
Nonterminal_CharactorRange, | |
Nonterminal_CharactorSequence, | |
Nonterminal_LevelA, | |
Nonterminal_LevelB, | |
Nonterminal_LevelC, | |
Nonterminal_S, | |
}; | |
public: | |
Parser(SemanticAction& sa) : sa_(sa) { reset(); } | |
void reset() { | |
error_ = false; | |
accepted_ = false; | |
clear_stack(); | |
rollback_tmp_stack(); | |
if (push_stack(0, value_type())) { | |
commit_tmp_stack(); | |
} else { | |
sa_.stack_overflow(); | |
error_ = true; | |
} | |
} | |
bool post(token_type token, const value_type& value) { | |
rollback_tmp_stack(); | |
error_ = false; | |
while ((this->*(stack_top()->entry->state))(token, value)) | |
; // may throw | |
if (!error_) { | |
commit_tmp_stack(); | |
} else { | |
recover(token, value); | |
} | |
return accepted_ || error_; | |
} | |
bool accept(value_type& v) { | |
assert(accepted_); | |
if (error_) { return false; } | |
v = accepted_value_; | |
return true; | |
} | |
bool error() { return error_; } | |
private: | |
typedef Parser<Value, SemanticAction, StackSize> self_type; | |
typedef bool (self_type::*state_type)(token_type, const value_type&); | |
typedef int (self_type::*gotof_type)(Nonterminal); | |
bool accepted_; | |
bool error_; | |
value_type accepted_value_; | |
SemanticAction& sa_; | |
struct table_entry { | |
state_type state; | |
gotof_type gotof; | |
bool handle_error; | |
}; | |
struct stack_frame { | |
const table_entry* entry; | |
value_type value; | |
int sequence_length; | |
stack_frame(const table_entry* e, const value_type& v, int sl) | |
: entry(e), value(v), sequence_length(sl) {} | |
}; | |
Stack<stack_frame, StackSize> stack_; | |
bool push_stack(int state_index, const value_type& v, int sl = 0) { | |
bool f = stack_.push(stack_frame(entry(state_index), v, sl)); | |
assert(!error_); | |
if (!f) { | |
error_ = true; | |
sa_.stack_overflow(); | |
} | |
return f; | |
} | |
void pop_stack(size_t n) { | |
stack_.pop(n); | |
} | |
stack_frame* stack_top() { | |
return &stack_.top(); | |
} | |
const value_type& get_arg(size_t base, size_t index) { | |
return stack_.get_arg(base, index).value; | |
} | |
void clear_stack() { | |
stack_.clear(); | |
} | |
void rollback_tmp_stack() { | |
stack_.rollback_tmp(); | |
} | |
void commit_tmp_stack() { | |
stack_.commit_tmp(); | |
} | |
void recover(Token, const value_type&) { | |
} | |
bool call_nothing(Nonterminal nonterminal, int base) { | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, value_type()); | |
} | |
bool call_0_make_identity(Nonterminal nonterminal, int base, int arg_index0) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* r = sa_.make_identity(arg0); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_0_make_charactor(Nonterminal nonterminal, int base, int arg_index0) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* r = sa_.make_charactor(arg0); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_0_make_dot(Nonterminal nonterminal, int base) { | |
node* r = sa_.make_dot(); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_0_make_class(Nonterminal nonterminal, int base, int arg_index0) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* r = sa_.make_class(arg0); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_0_make_kleene(Nonterminal nonterminal, int base, int arg_index0) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* r = sa_.make_kleene(arg0); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_0_make_kleene_plus(Nonterminal nonterminal, int base, int arg_index0) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* r = sa_.make_kleene_plus(arg0); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_0_make_one_or_zero(Nonterminal nonterminal, int base, int arg_index0) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* r = sa_.make_one_or_zero(arg0); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_0_make_seq(Nonterminal nonterminal, int base, int arg_index0) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* r = sa_.make_seq(arg0); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_1_make_seq(Nonterminal nonterminal, int base, int arg_index0, int arg_index1) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* arg1; sa_.downcast(arg1, get_arg(base, arg_index1)); | |
node* r = sa_.make_seq(arg0, arg1); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_0_make_class_range(Nonterminal nonterminal, int base, int arg_index0, int arg_index1) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* arg1; sa_.downcast(arg1, get_arg(base, arg_index1)); | |
node* r = sa_.make_class_range(arg0, arg1); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_1_make_class_range(Nonterminal nonterminal, int base, int arg_index0) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* r = sa_.make_class_range(arg0); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_0_make_or(Nonterminal nonterminal, int base, int arg_index0, int arg_index1) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* arg1; sa_.downcast(arg1, get_arg(base, arg_index1)); | |
node* r = sa_.make_or(arg0, arg1); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_0_make_concat(Nonterminal nonterminal, int base, int arg_index0, int arg_index1) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* arg1; sa_.downcast(arg1, get_arg(base, arg_index1)); | |
node* r = sa_.make_concat(arg0, arg1); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool call_0_make_range(Nonterminal nonterminal, int base, int arg_index0, int arg_index1) { | |
node* arg0; sa_.downcast(arg0, get_arg(base, arg_index0)); | |
node* arg1; sa_.downcast(arg1, get_arg(base, arg_index1)); | |
node* r = sa_.make_range(arg0, arg1); | |
value_type v; sa_.upcast(v, r); | |
pop_stack(base); | |
int dest_index = (this->*(stack_top()->entry->gotof))(nonterminal); | |
return push_stack(dest_index, v); | |
} | |
bool state_0(token_type token, const value_type& value) { | |
switch(token) { | |
case token_charactor: | |
// shift | |
push_stack(/*state*/ 12, value); | |
return false; | |
case token_dot: | |
// shift | |
push_stack(/*state*/ 13, value); | |
return false; | |
case token_l_bracket: | |
// shift | |
push_stack(/*state*/ 14, value); | |
return false; | |
case token_l_paren: | |
// shift | |
push_stack(/*state*/ 5, value); | |
return false; | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_0(Nonterminal nonterminal) { | |
switch(nonterminal) { | |
case Nonterminal_S: return 1; | |
case Nonterminal_LevelA: return 8; | |
case Nonterminal_LevelB: return 3; | |
case Nonterminal_LevelC: return 2; | |
default: assert(0); return false; | |
} | |
} | |
bool state_1(token_type token, const value_type& value) { | |
switch(token) { | |
case token_eof: | |
// accept | |
accepted_ = true; | |
accepted_value_ = get_arg(1, 0); | |
return false; | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_1(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_2(token_type token, const value_type& value) { | |
switch(token) { | |
case token_eof: | |
// reduce | |
return call_0_make_identity(Nonterminal_S, /*pop*/ 1, 0); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_2(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_3(token_type token, const value_type& value) { | |
switch(token) { | |
case token_charactor: | |
// shift | |
push_stack(/*state*/ 12, value); | |
return false; | |
case token_dot: | |
// shift | |
push_stack(/*state*/ 13, value); | |
return false; | |
case token_l_bracket: | |
// shift | |
push_stack(/*state*/ 14, value); | |
return false; | |
case token_l_paren: | |
// shift | |
push_stack(/*state*/ 5, value); | |
return false; | |
case token_vertical_bar: | |
// shift | |
push_stack(/*state*/ 4, value); | |
return false; | |
case token_eof: | |
case token_r_paren: | |
// reduce | |
return call_0_make_identity(Nonterminal_LevelC, /*pop*/ 1, 0); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_3(Nonterminal nonterminal) { | |
switch(nonterminal) { | |
case Nonterminal_LevelA: return 8; | |
case Nonterminal_LevelB: return 3; | |
case Nonterminal_LevelC: return 7; | |
default: assert(0); return false; | |
} | |
} | |
bool state_4(token_type token, const value_type& value) { | |
switch(token) { | |
case token_charactor: | |
// shift | |
push_stack(/*state*/ 12, value); | |
return false; | |
case token_dot: | |
// shift | |
push_stack(/*state*/ 13, value); | |
return false; | |
case token_l_bracket: | |
// shift | |
push_stack(/*state*/ 14, value); | |
return false; | |
case token_l_paren: | |
// shift | |
push_stack(/*state*/ 5, value); | |
return false; | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_4(Nonterminal nonterminal) { | |
switch(nonterminal) { | |
case Nonterminal_LevelA: return 8; | |
case Nonterminal_LevelB: return 3; | |
case Nonterminal_LevelC: return 6; | |
default: assert(0); return false; | |
} | |
} | |
bool state_5(token_type token, const value_type& value) { | |
switch(token) { | |
case token_charactor: | |
// shift | |
push_stack(/*state*/ 12, value); | |
return false; | |
case token_dot: | |
// shift | |
push_stack(/*state*/ 13, value); | |
return false; | |
case token_l_bracket: | |
// shift | |
push_stack(/*state*/ 14, value); | |
return false; | |
case token_l_paren: | |
// shift | |
push_stack(/*state*/ 5, value); | |
return false; | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_5(Nonterminal nonterminal) { | |
switch(nonterminal) { | |
case Nonterminal_LevelA: return 8; | |
case Nonterminal_LevelB: return 3; | |
case Nonterminal_LevelC: return 17; | |
default: assert(0); return false; | |
} | |
} | |
bool state_6(token_type token, const value_type& value) { | |
switch(token) { | |
case token_eof: | |
case token_r_paren: | |
// reduce | |
return call_0_make_or(Nonterminal_LevelC, /*pop*/ 3, 0, 2); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_6(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_7(token_type token, const value_type& value) { | |
switch(token) { | |
case token_eof: | |
case token_r_paren: | |
// reduce | |
return call_0_make_concat(Nonterminal_LevelC, /*pop*/ 2, 0, 1); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_7(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_8(token_type token, const value_type& value) { | |
switch(token) { | |
case token_asterisk: | |
// shift | |
push_stack(/*state*/ 9, value); | |
return false; | |
case token_plus: | |
// shift | |
push_stack(/*state*/ 10, value); | |
return false; | |
case token_question: | |
// shift | |
push_stack(/*state*/ 11, value); | |
return false; | |
case token_eof: | |
case token_charactor: | |
case token_dot: | |
case token_l_bracket: | |
case token_l_paren: | |
case token_r_paren: | |
case token_vertical_bar: | |
// reduce | |
return call_0_make_identity(Nonterminal_LevelB, /*pop*/ 1, 0); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_8(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_9(token_type token, const value_type& value) { | |
switch(token) { | |
case token_eof: | |
case token_charactor: | |
case token_dot: | |
case token_l_bracket: | |
case token_l_paren: | |
case token_r_paren: | |
case token_vertical_bar: | |
// reduce | |
return call_0_make_kleene(Nonterminal_LevelB, /*pop*/ 2, 0); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_9(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_10(token_type token, const value_type& value) { | |
switch(token) { | |
case token_eof: | |
case token_charactor: | |
case token_dot: | |
case token_l_bracket: | |
case token_l_paren: | |
case token_r_paren: | |
case token_vertical_bar: | |
// reduce | |
return call_0_make_kleene_plus(Nonterminal_LevelB, /*pop*/ 2, 0); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_10(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_11(token_type token, const value_type& value) { | |
switch(token) { | |
case token_eof: | |
case token_charactor: | |
case token_dot: | |
case token_l_bracket: | |
case token_l_paren: | |
case token_r_paren: | |
case token_vertical_bar: | |
// reduce | |
return call_0_make_one_or_zero(Nonterminal_LevelB, /*pop*/ 2, 0); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_11(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_12(token_type token, const value_type& value) { | |
switch(token) { | |
case token_eof: | |
case token_asterisk: | |
case token_charactor: | |
case token_dot: | |
case token_l_bracket: | |
case token_l_paren: | |
case token_plus: | |
case token_question: | |
case token_r_paren: | |
case token_vertical_bar: | |
// reduce | |
return call_0_make_charactor(Nonterminal_LevelA, /*pop*/ 1, 0); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_12(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_13(token_type token, const value_type& value) { | |
switch(token) { | |
case token_eof: | |
case token_asterisk: | |
case token_charactor: | |
case token_dot: | |
case token_l_bracket: | |
case token_l_paren: | |
case token_plus: | |
case token_question: | |
case token_r_paren: | |
case token_vertical_bar: | |
// reduce | |
return call_0_make_dot(Nonterminal_LevelA, /*pop*/ 1); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_13(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_14(token_type token, const value_type& value) { | |
switch(token) { | |
case token_charactor: | |
// shift | |
push_stack(/*state*/ 19, value); | |
return false; | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_14(Nonterminal nonterminal) { | |
switch(nonterminal) { | |
case Nonterminal_CharactorSequence: return 15; | |
case Nonterminal_CharactorRange: return 21; | |
default: assert(0); return false; | |
} | |
} | |
bool state_15(token_type token, const value_type& value) { | |
switch(token) { | |
case token_charactor: | |
// shift | |
push_stack(/*state*/ 20, value); | |
return false; | |
case token_r_bracket: | |
// shift | |
push_stack(/*state*/ 16, value); | |
return false; | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_15(Nonterminal nonterminal) { | |
switch(nonterminal) { | |
case Nonterminal_CharactorRange: return 22; | |
default: assert(0); return false; | |
} | |
} | |
bool state_16(token_type token, const value_type& value) { | |
switch(token) { | |
case token_eof: | |
case token_asterisk: | |
case token_charactor: | |
case token_dot: | |
case token_l_bracket: | |
case token_l_paren: | |
case token_plus: | |
case token_question: | |
case token_r_paren: | |
case token_vertical_bar: | |
// reduce | |
return call_0_make_class(Nonterminal_LevelA, /*pop*/ 3, 1); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_16(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_17(token_type token, const value_type& value) { | |
switch(token) { | |
case token_r_paren: | |
// shift | |
push_stack(/*state*/ 18, value); | |
return false; | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_17(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_18(token_type token, const value_type& value) { | |
switch(token) { | |
case token_eof: | |
case token_asterisk: | |
case token_charactor: | |
case token_dot: | |
case token_l_bracket: | |
case token_l_paren: | |
case token_plus: | |
case token_question: | |
case token_r_paren: | |
case token_vertical_bar: | |
// reduce | |
return call_0_make_identity(Nonterminal_LevelA, /*pop*/ 3, 1); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_18(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_19(token_type token, const value_type& value) { | |
switch(token) { | |
case token_hyphen: | |
// shift | |
push_stack(/*state*/ 23, value); | |
return false; | |
case token_charactor: | |
case token_r_bracket: | |
// reduce | |
return call_0_make_seq(Nonterminal_CharactorSequence, /*pop*/ 1, 0); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_19(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_20(token_type token, const value_type& value) { | |
switch(token) { | |
case token_hyphen: | |
// shift | |
push_stack(/*state*/ 23, value); | |
return false; | |
case token_charactor: | |
case token_r_bracket: | |
// reduce | |
return call_1_make_seq(Nonterminal_CharactorSequence, /*pop*/ 2, 0, 1); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_20(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_21(token_type token, const value_type& value) { | |
switch(token) { | |
case token_charactor: | |
case token_r_bracket: | |
// reduce | |
return call_1_make_class_range(Nonterminal_CharactorSequence, /*pop*/ 1, 0); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_21(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_22(token_type token, const value_type& value) { | |
switch(token) { | |
case token_charactor: | |
case token_r_bracket: | |
// reduce | |
return call_0_make_class_range(Nonterminal_CharactorSequence, /*pop*/ 2, 0, 1); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_22(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_23(token_type token, const value_type& value) { | |
switch(token) { | |
case token_charactor: | |
// shift | |
push_stack(/*state*/ 24, value); | |
return false; | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_23(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
bool state_24(token_type token, const value_type& value) { | |
switch(token) { | |
case token_charactor: | |
case token_r_bracket: | |
// reduce | |
return call_0_make_range(Nonterminal_CharactorRange, /*pop*/ 3, 0, 2); | |
default: | |
sa_.syntax_error(); | |
error_ = true; | |
return false; | |
} | |
} | |
int gotof_24(Nonterminal nonterminal) { | |
assert(0); | |
return true; | |
} | |
const table_entry* entry(int n) const { | |
static const table_entry entries[] = { | |
{ &Parser::state_0, &Parser::gotof_0, false }, | |
{ &Parser::state_1, &Parser::gotof_1, false }, | |
{ &Parser::state_2, &Parser::gotof_2, false }, | |
{ &Parser::state_3, &Parser::gotof_3, false }, | |
{ &Parser::state_4, &Parser::gotof_4, false }, | |
{ &Parser::state_5, &Parser::gotof_5, false }, | |
{ &Parser::state_6, &Parser::gotof_6, false }, | |
{ &Parser::state_7, &Parser::gotof_7, false }, | |
{ &Parser::state_8, &Parser::gotof_8, false }, | |
{ &Parser::state_9, &Parser::gotof_9, false }, | |
{ &Parser::state_10, &Parser::gotof_10, false }, | |
{ &Parser::state_11, &Parser::gotof_11, false }, | |
{ &Parser::state_12, &Parser::gotof_12, false }, | |
{ &Parser::state_13, &Parser::gotof_13, false }, | |
{ &Parser::state_14, &Parser::gotof_14, false }, | |
{ &Parser::state_15, &Parser::gotof_15, false }, | |
{ &Parser::state_16, &Parser::gotof_16, false }, | |
{ &Parser::state_17, &Parser::gotof_17, false }, | |
{ &Parser::state_18, &Parser::gotof_18, false }, | |
{ &Parser::state_19, &Parser::gotof_19, false }, | |
{ &Parser::state_20, &Parser::gotof_20, false }, | |
{ &Parser::state_21, &Parser::gotof_21, false }, | |
{ &Parser::state_22, &Parser::gotof_22, false }, | |
{ &Parser::state_23, &Parser::gotof_23, false }, | |
{ &Parser::state_24, &Parser::gotof_24, false }, | |
}; | |
return &entries[n]; | |
} | |
}; | |
} // namespace regex_parser | |
#endif // #ifndef REGEX_PARSER_HPP_ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment