Created
March 9, 2014 18:52
-
-
Save odashi/9452545 to your computer and use it in GitHub Desktop.
Boost.SpiritによるPascalのパーサ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// parser_pascal.cpp | |
#include <iostream> | |
#include <string> | |
#define BOOST_SPIRIT_USE_OLD_NAMESPACE | |
//#define BOOST_SPIRIT_DEBUG | |
#include <boost/spirit/include/classic.hpp> | |
#include <boost/spirit/include/classic_ast.hpp> | |
#include "parser.h" | |
using namespace boost::spirit; | |
//////////////////////////////////////////////////////////////////////////////// | |
// | |
// struct PascalSkipParser | |
// | |
// @brief: Pascalの文法定義 | |
// @update: 2011/10/08 by Y.ODA | |
// | |
//////////////////////////////////////////////////////////////////////////////// | |
struct PascalSkipParser : public grammar<PascalSkipParser> | |
{ | |
template<typename Scannar> | |
struct definition | |
{ | |
rule<Scannar> skip_p; | |
// 構文定義 | |
definition(const PascalSkipParser &self) | |
{ | |
skip_p = +space_p | comment_p('{', '}'); | |
// デバッグ出力(出力しない) | |
BOOST_SPIRIT_DEBUG_TRACE_NODE(self, false); | |
BOOST_SPIRIT_DEBUG_TRACE_NODE(skip_p, false); | |
} | |
// 開始記号 | |
const rule<Scannar> &start() const | |
{ | |
return skip_p; | |
} | |
}; | |
}; | |
//////////////////////////////////////////////////////////////////////////////// | |
// | |
// struct PascalParser | |
// | |
// @brief: Pascalの文法定義 | |
// @update: 2011/10/08 by Y.ODA | |
// | |
//////////////////////////////////////////////////////////////////////////////// | |
struct PascalParser : public grammar<PascalParser> | |
{ | |
enum Id | |
{ | |
ID_BASETYPE = 1, | |
ID_IDENTIFIER, | |
ID_LITERAL, | |
ID_PROGRAM, | |
ID_DEF_BLOCK, | |
ID_CODE_BLOCK, | |
ID_CONST_DEF, | |
ID_TYPE_DEF, | |
ID_VARIABLE_DEF, | |
ID_PROCEDUE_DEF, | |
ID_FUNCTION_DEF, | |
ID_ARG_DEF, | |
ID_TYPE_DESC, | |
ID_ARRAY_DESC, | |
ID_RECORD_DESC, | |
ID_STATEMENT, | |
ID_IF_BLOCK, | |
ID_CASE_BLOCK, | |
ID_FOR_BLOCK, | |
ID_WHILE_BLOCK, | |
ID_REPEAT_BLOCK, | |
ID_PROCEDURE_CALL, | |
ID_SUBSTITUTION, | |
ID_VARIABLE, | |
ID_EXPRESSION, | |
ID_SIMPLE_EXPRESSION, | |
ID_TERM, | |
ID_FACTOR | |
}; | |
template<typename Scannar> | |
struct definition | |
{ | |
#define RULE(id) rule<Scannar, parser_context<>, parser_tag<id>> | |
rule<Scannar> keyword; | |
RULE(ID_BASETYPE) basetype; | |
RULE(ID_IDENTIFIER) identifier; | |
RULE(ID_LITERAL) literal_p; | |
rule<Scannar> string_p; | |
RULE(ID_PROGRAM) program; | |
RULE(ID_DEF_BLOCK) def_block; | |
RULE(ID_CODE_BLOCK) code_block; | |
RULE(ID_CONST_DEF) const_def; | |
RULE(ID_TYPE_DEF) type_def; | |
RULE(ID_VARIABLE_DEF) variable_def; | |
RULE(ID_PROCEDUE_DEF) procedure_def; | |
RULE(ID_FUNCTION_DEF) function_def; | |
RULE(ID_ARG_DEF) arg_def; | |
RULE(ID_TYPE_DESC) type_desc; | |
RULE(ID_ARRAY_DESC) array_desc; | |
RULE(ID_RECORD_DESC) record_desc; | |
RULE(ID_STATEMENT) statement; | |
RULE(ID_IF_BLOCK) if_block; | |
RULE(ID_CASE_BLOCK) case_block; | |
RULE(ID_FOR_BLOCK) for_block; | |
RULE(ID_WHILE_BLOCK) while_block; | |
RULE(ID_REPEAT_BLOCK) repeat_block; | |
RULE(ID_PROCEDURE_CALL) procedure_call; | |
RULE(ID_SUBSTITUTION) substitution; | |
RULE(ID_VARIABLE) variable; | |
RULE(ID_EXPRESSION) expression; | |
RULE(ID_SIMPLE_EXPRESSION) simple_expression; | |
RULE(ID_TERM) term; | |
RULE(ID_FACTOR) factor; | |
PascalSkipParser skip_p; | |
// 構文定義 | |
definition(const PascalParser &self) | |
{ | |
keyword | |
= str_p("begin") | "end" | |
| "if" | "case" | "then" | "else" | "for" | "to" | "while" | "do" | "repeat" | "until" | |
| "and" | "or" | "not" | "div" | "mod" | |
| "array" | "of" | "record" | |
| "const" | "type" | "var" | "procedure" | "function" | "program"; | |
basetype | |
= str_p("int") | "real"; | |
identifier | |
//= lexeme_d[(alpha_p | '_') >> *(alnum_p | '_')] - (keyword | basetype); | |
= no_node_d[*skip_p] | |
>> leaf_node_d[ | |
lexeme_d[(alpha_p | '_') >> *(alnum_p | '_')] - (keyword | basetype) | |
]; | |
literal_p | |
// 整数と実数は最長一致とする | |
= longest_d[int_p | real_p] | string_p; | |
string_p | |
//= lexeme_d[confix_p('\"', *c_escape_ch_p, '\"')]; | |
= no_node_d[*skip_p] | |
>> leaf_node_d[ | |
lexeme_d[confix_p('\"', *c_escape_ch_p, '\"')] | |
]; | |
program | |
= no_node_d[str_p("program")] | |
>> identifier | |
>> no_node_d[ch_p('(')] | |
>> !(identifier % no_node_d[ch_p(',')]) | |
>> no_node_d[ch_p(')')] | |
>> no_node_d[ch_p(';')] | |
>> def_block | |
>> code_block | |
>> no_node_d[ch_p('.')]; | |
def_block | |
= *( const_def | |
| type_def | |
| variable_def | |
| function_def | |
| procedure_def); | |
code_block | |
= no_node_d[str_p("begin")] | |
>> !(statement % no_node_d[ch_p(';')]) | |
>> no_node_d[str_p("end")]; | |
const_def | |
= no_node_d[str_p("const")] | |
>> +( identifier | |
>> no_node_d[ch_p('=')] | |
>> literal_p | |
>> no_node_d[ch_p(';')]); | |
type_def | |
= no_node_d[str_p("type")] | |
>> +( identifier | |
>> no_node_d[ch_p('=')] | |
>> type_desc | |
>> no_node_d[ch_p(';')]); | |
variable_def | |
= no_node_d[str_p("var")] | |
>> +( identifier | |
>> *(no_node_d[ch_p(',')] >> identifier) | |
>> no_node_d[ch_p(':')] | |
>> type_desc | |
>> no_node_d[ch_p(';')]); | |
procedure_def | |
= no_node_d[str_p("procedure")] | |
>> identifier | |
>> no_node_d[str_p('(')] | |
>> !(arg_def % no_node_d[str_p(',')]) | |
>> no_node_d[str_p(')')] | |
>> no_node_d[str_p(';')] | |
>> def_block | |
>> code_block | |
>> no_node_d[str_p(';')]; | |
function_def | |
= no_node_d[str_p("function")] | |
>> identifier | |
>> no_node_d[str_p('(')] | |
>> !(arg_def % no_node_d[str_p(',')]) | |
>> no_node_d[str_p(')')] | |
>> no_node_d[str_p(':')] | |
>> type_desc | |
>> no_node_d[str_p(';')] | |
>> def_block | |
>> code_block | |
>> no_node_d[str_p(';')]; | |
arg_def | |
= !str_p("var") | |
>> identifier | |
>> no_node_d[ch_p(':')] | |
>> type_desc; | |
type_desc | |
= identifier | |
| basetype | |
| array_desc | |
| record_desc; | |
array_desc | |
= no_node_d[str_p("array")] | |
>> no_node_d[ch_p('[')] | |
>> int_p | |
>> no_node_d[str_p("..")] | |
>> int_p | |
>> no_node_d[ch_p(']')] | |
>> no_node_d[str_p("of")] | |
>> type_desc; | |
record_desc | |
= no_node_d[str_p("record")] | |
>> +( identifier | |
>> no_node_d[ch_p(':')] | |
>> type_desc | |
>> no_node_d[ch_p(';')]) | |
>> no_node_d[str_p("end")]; | |
statement | |
= if_block | |
| case_block | |
| for_block | |
| while_block | |
| repeat_block | |
| code_block | |
| procedure_call | |
| substitution | |
| identifier; // 括弧省略の手続き/関数呼び出しをここで判定 | |
if_block | |
= no_node_d[str_p("if")] | |
>> expression | |
>> no_node_d[str_p("then")] | |
>> statement | |
>> !( no_node_d[str_p("else")] | |
>> statement); | |
case_block | |
= no_node_d[str_p("case")] | |
>> expression | |
>> no_node_d[str_p("of")] | |
>> *( literal_p | |
>> no_node_d[ch_p(':')] | |
>> statement | |
>> no_node_d[ch_p(';')]) | |
>> !( no_node_d[str_p("else")] | |
>> statement) | |
>> no_node_d[str_p("end")]; | |
for_block | |
= no_node_d[str_p("for")] | |
>> identifier | |
>> no_node_d[str_p(":=")] | |
>> expression | |
>> no_node_d[str_p("to")] | |
>> expression | |
>> no_node_d[str_p("do")] | |
>> statement; | |
while_block | |
= no_node_d[str_p("while")] | |
>> expression | |
>> no_node_d[str_p("do")] | |
>> statement; | |
repeat_block | |
= no_node_d[str_p("repeat")] | |
>> (statement % no_node_d[ch_p(';')]) | |
>> no_node_d[str_p("until")] | |
>> expression; | |
procedure_call | |
// 引数なしなら括弧は省略できるが、解析が面倒なので別で判定する | |
= identifier | |
>> no_node_d[ch_p('(')] | |
>> !(expression % no_node_d[ch_p(',')]) | |
>> no_node_d[ch_p(')')]; | |
substitution | |
= variable | |
>> no_node_d[str_p(":=")] | |
>> expression; | |
variable | |
= identifier | |
>> !( no_node_d[ch_p('[')] | |
>> (expression % no_node_d[ch_p(',')]) | |
>> no_node_d[ch_p(']')]) | |
>> !( no_node_d[ch_p('.')] | |
>> variable); | |
expression | |
= simple_expression | |
>> !( (ch_p('=')|"<>"|'<'|'>'|"<="|">=") | |
>> simple_expression); | |
simple_expression | |
= !(ch_p('+')|'-') | |
>> term | |
>> *( (ch_p('+')|'-'|"or") | |
>> term); | |
term | |
= factor | |
>> *( (ch_p('*')|'/'|"div"|"mod"|"and") | |
>> factor); | |
factor | |
// 括弧省略の手続き/関数を変数と見分ける方法がないので意味解析に任せる | |
= literal_p | |
| procedure_call | |
| variable | |
| (no_node_d[ch_p('(')] >> expression >> no_node_d[ch_p(')')]) | |
| ("not" >> expression); | |
// デバッグ出力 | |
BOOST_SPIRIT_DEBUG_RULE(keyword); | |
BOOST_SPIRIT_DEBUG_RULE(basetype); | |
BOOST_SPIRIT_DEBUG_RULE(identifier); | |
BOOST_SPIRIT_DEBUG_RULE(literal_p); | |
BOOST_SPIRIT_DEBUG_RULE(string_p); | |
BOOST_SPIRIT_DEBUG_RULE(program); | |
BOOST_SPIRIT_DEBUG_RULE(def_block); | |
BOOST_SPIRIT_DEBUG_RULE(code_block); | |
BOOST_SPIRIT_DEBUG_RULE(const_def); | |
BOOST_SPIRIT_DEBUG_RULE(type_def); | |
BOOST_SPIRIT_DEBUG_RULE(variable_def); | |
BOOST_SPIRIT_DEBUG_RULE(procedure_def); | |
BOOST_SPIRIT_DEBUG_RULE(function_def); | |
BOOST_SPIRIT_DEBUG_RULE(arg_def); | |
BOOST_SPIRIT_DEBUG_RULE(type_desc); | |
BOOST_SPIRIT_DEBUG_RULE(array_desc); | |
BOOST_SPIRIT_DEBUG_RULE(record_desc); | |
BOOST_SPIRIT_DEBUG_RULE(statement); | |
BOOST_SPIRIT_DEBUG_RULE(if_block); | |
BOOST_SPIRIT_DEBUG_RULE(case_block); | |
BOOST_SPIRIT_DEBUG_RULE(for_block); | |
BOOST_SPIRIT_DEBUG_RULE(while_block); | |
BOOST_SPIRIT_DEBUG_RULE(repeat_block); | |
BOOST_SPIRIT_DEBUG_RULE(procedure_call); | |
BOOST_SPIRIT_DEBUG_RULE(substitution); | |
BOOST_SPIRIT_DEBUG_RULE(variable); | |
BOOST_SPIRIT_DEBUG_RULE(expression); | |
BOOST_SPIRIT_DEBUG_RULE(simple_expression); | |
BOOST_SPIRIT_DEBUG_RULE(term); | |
BOOST_SPIRIT_DEBUG_RULE(factor); | |
} | |
// 開始記号 | |
const RULE(ID_PROGRAM) &start() const | |
{ | |
return program; | |
} | |
#undef RULE | |
}; | |
}; | |
//------------------------------------------------------------------------------ | |
// | |
// void DumpTree() | |
// | |
// @brief: 構文木をダンプする | |
// @update: 2011/10/08 by Y.ODA | |
// @args: | |
// tree_match<const char *>::tree_iterator &iter: 対象の木のイテレータ | |
// int nest: 現在の階層 | |
// @ret: | |
// (void) | |
// | |
//------------------------------------------------------------------------------ | |
void DumpTree(tree_match<const char *>::tree_iterator &iter, int nest) | |
{ | |
// インデント | |
for (int i = 0; i < nest; ++i) | |
std::cout << "| "; | |
std::string id | |
= iter->value.id() == PascalParser::ID_BASETYPE ? "basetype" | |
: iter->value.id() == PascalParser::ID_IDENTIFIER ? "identifier" | |
: iter->value.id() == PascalParser::ID_LITERAL ? "literal" | |
: iter->value.id() == PascalParser::ID_PROGRAM ? "program" | |
: iter->value.id() == PascalParser::ID_DEF_BLOCK ? "def_block" | |
: iter->value.id() == PascalParser::ID_CODE_BLOCK ? "code_block" | |
: iter->value.id() == PascalParser::ID_CONST_DEF ? "const_def" | |
: iter->value.id() == PascalParser::ID_TYPE_DEF ? "type_def" | |
: iter->value.id() == PascalParser::ID_VARIABLE_DEF ? "variable_def" | |
: iter->value.id() == PascalParser::ID_PROCEDUE_DEF ? "procedure_def" | |
: iter->value.id() == PascalParser::ID_FUNCTION_DEF ? "function_def" | |
: iter->value.id() == PascalParser::ID_ARG_DEF ? "arg_def" | |
: iter->value.id() == PascalParser::ID_TYPE_DESC ? "type_desc" | |
: iter->value.id() == PascalParser::ID_ARRAY_DESC ? "array_desc" | |
: iter->value.id() == PascalParser::ID_RECORD_DESC ? "record_desc" | |
: iter->value.id() == PascalParser::ID_STATEMENT ? "statement" | |
: iter->value.id() == PascalParser::ID_IF_BLOCK ? "if_block" | |
: iter->value.id() == PascalParser::ID_CASE_BLOCK ? "case_block" | |
: iter->value.id() == PascalParser::ID_FOR_BLOCK ? "for_block" | |
: iter->value.id() == PascalParser::ID_WHILE_BLOCK ? "while_block" | |
: iter->value.id() == PascalParser::ID_REPEAT_BLOCK ? "repeat_block" | |
: iter->value.id() == PascalParser::ID_PROCEDURE_CALL ? "procedure_call" | |
: iter->value.id() == PascalParser::ID_SUBSTITUTION ? "substitution" | |
: iter->value.id() == PascalParser::ID_VARIABLE ? "variable" | |
: iter->value.id() == PascalParser::ID_EXPRESSION ? "expression" | |
: iter->value.id() == PascalParser::ID_SIMPLE_EXPRESSION ? "simple_expression" | |
: iter->value.id() == PascalParser::ID_TERM ? "term" | |
: iter->value.id() == PascalParser::ID_FACTOR ? "factor" | |
: "unknown"; | |
std::string data(iter->value.begin(), iter->value.end()); | |
// 要素を表示 | |
if (data.size() > 0) | |
std::cout << id << " \"" << data << "\"" << std::endl; | |
else | |
std::cout << id << std::endl; | |
// 子ノードを表示 | |
tree_match<const char *>::tree_iterator child; | |
for (child = iter->children.begin(); child != iter->children.end(); ++child) | |
DumpTree(child, nest+1); | |
} | |
//------------------------------------------------------------------------------ | |
// | |
// bool parse_pascal() | |
// | |
// @brief: Pascalの構文解析 | |
// @update: 2011/10/08 by Y.ODA | |
// @args: | |
// const char *data: Pascalソースコードを格納した文字列(終端'\0') | |
// @ret: | |
// (bool) 成功したかどうか | |
// | |
//------------------------------------------------------------------------------ | |
bool parse_pascal(const char *data) | |
{ | |
PascalParser g; | |
PascalSkipParser skip_p; | |
tree_parse_info<> r = ast_parse(data, g, skip_p); | |
//parse_info<> r = parse(data, g, skip_p); | |
if (r.match) | |
//if (r.hit) | |
{ | |
std::cout << "Succeeded." << std::endl; | |
DumpTree(r.trees.begin(), 0); | |
} | |
else | |
std::cout << "Failed." << std::endl; | |
return r.match; | |
//return r.hit; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment