Skip to content

Instantly share code, notes, and snippets.

@odashi
Created March 9, 2014 18:52
Show Gist options
  • Save odashi/9452545 to your computer and use it in GitHub Desktop.
Save odashi/9452545 to your computer and use it in GitHub Desktop.
Boost.SpiritによるPascalのパーサ
// parser_pascal.cpp
#include <iostream>
#include <string>
#define BOOST_SPIRIT_USE_OLD_NAMESPACE
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/classic.hpp>
#include <boost/spirit/include/classic_ast.hpp>
#include "parser.h"
using namespace boost::spirit;
////////////////////////////////////////////////////////////////////////////////
//
// struct PascalSkipParser
//
// @brief: Pascalの文法定義
// @update: 2011/10/08 by Y.ODA
//
////////////////////////////////////////////////////////////////////////////////
struct PascalSkipParser : public grammar<PascalSkipParser>
{
template<typename Scannar>
struct definition
{
rule<Scannar> skip_p;
// 構文定義
definition(const PascalSkipParser &self)
{
skip_p = +space_p | comment_p('{', '}');
// デバッグ出力(出力しない)
BOOST_SPIRIT_DEBUG_TRACE_NODE(self, false);
BOOST_SPIRIT_DEBUG_TRACE_NODE(skip_p, false);
}
// 開始記号
const rule<Scannar> &start() const
{
return skip_p;
}
};
};
////////////////////////////////////////////////////////////////////////////////
//
// struct PascalParser
//
// @brief: Pascalの文法定義
// @update: 2011/10/08 by Y.ODA
//
////////////////////////////////////////////////////////////////////////////////
struct PascalParser : public grammar<PascalParser>
{
enum Id
{
ID_BASETYPE = 1,
ID_IDENTIFIER,
ID_LITERAL,
ID_PROGRAM,
ID_DEF_BLOCK,
ID_CODE_BLOCK,
ID_CONST_DEF,
ID_TYPE_DEF,
ID_VARIABLE_DEF,
ID_PROCEDUE_DEF,
ID_FUNCTION_DEF,
ID_ARG_DEF,
ID_TYPE_DESC,
ID_ARRAY_DESC,
ID_RECORD_DESC,
ID_STATEMENT,
ID_IF_BLOCK,
ID_CASE_BLOCK,
ID_FOR_BLOCK,
ID_WHILE_BLOCK,
ID_REPEAT_BLOCK,
ID_PROCEDURE_CALL,
ID_SUBSTITUTION,
ID_VARIABLE,
ID_EXPRESSION,
ID_SIMPLE_EXPRESSION,
ID_TERM,
ID_FACTOR
};
template<typename Scannar>
struct definition
{
#define RULE(id) rule<Scannar, parser_context<>, parser_tag<id>>
rule<Scannar> keyword;
RULE(ID_BASETYPE) basetype;
RULE(ID_IDENTIFIER) identifier;
RULE(ID_LITERAL) literal_p;
rule<Scannar> string_p;
RULE(ID_PROGRAM) program;
RULE(ID_DEF_BLOCK) def_block;
RULE(ID_CODE_BLOCK) code_block;
RULE(ID_CONST_DEF) const_def;
RULE(ID_TYPE_DEF) type_def;
RULE(ID_VARIABLE_DEF) variable_def;
RULE(ID_PROCEDUE_DEF) procedure_def;
RULE(ID_FUNCTION_DEF) function_def;
RULE(ID_ARG_DEF) arg_def;
RULE(ID_TYPE_DESC) type_desc;
RULE(ID_ARRAY_DESC) array_desc;
RULE(ID_RECORD_DESC) record_desc;
RULE(ID_STATEMENT) statement;
RULE(ID_IF_BLOCK) if_block;
RULE(ID_CASE_BLOCK) case_block;
RULE(ID_FOR_BLOCK) for_block;
RULE(ID_WHILE_BLOCK) while_block;
RULE(ID_REPEAT_BLOCK) repeat_block;
RULE(ID_PROCEDURE_CALL) procedure_call;
RULE(ID_SUBSTITUTION) substitution;
RULE(ID_VARIABLE) variable;
RULE(ID_EXPRESSION) expression;
RULE(ID_SIMPLE_EXPRESSION) simple_expression;
RULE(ID_TERM) term;
RULE(ID_FACTOR) factor;
PascalSkipParser skip_p;
// 構文定義
definition(const PascalParser &self)
{
keyword
= str_p("begin") | "end"
| "if" | "case" | "then" | "else" | "for" | "to" | "while" | "do" | "repeat" | "until"
| "and" | "or" | "not" | "div" | "mod"
| "array" | "of" | "record"
| "const" | "type" | "var" | "procedure" | "function" | "program";
basetype
= str_p("int") | "real";
identifier
//= lexeme_d[(alpha_p | '_') >> *(alnum_p | '_')] - (keyword | basetype);
= no_node_d[*skip_p]
>> leaf_node_d[
lexeme_d[(alpha_p | '_') >> *(alnum_p | '_')] - (keyword | basetype)
];
literal_p
// 整数と実数は最長一致とする
= longest_d[int_p | real_p] | string_p;
string_p
//= lexeme_d[confix_p('\"', *c_escape_ch_p, '\"')];
= no_node_d[*skip_p]
>> leaf_node_d[
lexeme_d[confix_p('\"', *c_escape_ch_p, '\"')]
];
program
= no_node_d[str_p("program")]
>> identifier
>> no_node_d[ch_p('(')]
>> !(identifier % no_node_d[ch_p(',')])
>> no_node_d[ch_p(')')]
>> no_node_d[ch_p(';')]
>> def_block
>> code_block
>> no_node_d[ch_p('.')];
def_block
= *( const_def
| type_def
| variable_def
| function_def
| procedure_def);
code_block
= no_node_d[str_p("begin")]
>> !(statement % no_node_d[ch_p(';')])
>> no_node_d[str_p("end")];
const_def
= no_node_d[str_p("const")]
>> +( identifier
>> no_node_d[ch_p('=')]
>> literal_p
>> no_node_d[ch_p(';')]);
type_def
= no_node_d[str_p("type")]
>> +( identifier
>> no_node_d[ch_p('=')]
>> type_desc
>> no_node_d[ch_p(';')]);
variable_def
= no_node_d[str_p("var")]
>> +( identifier
>> *(no_node_d[ch_p(',')] >> identifier)
>> no_node_d[ch_p(':')]
>> type_desc
>> no_node_d[ch_p(';')]);
procedure_def
= no_node_d[str_p("procedure")]
>> identifier
>> no_node_d[str_p('(')]
>> !(arg_def % no_node_d[str_p(',')])
>> no_node_d[str_p(')')]
>> no_node_d[str_p(';')]
>> def_block
>> code_block
>> no_node_d[str_p(';')];
function_def
= no_node_d[str_p("function")]
>> identifier
>> no_node_d[str_p('(')]
>> !(arg_def % no_node_d[str_p(',')])
>> no_node_d[str_p(')')]
>> no_node_d[str_p(':')]
>> type_desc
>> no_node_d[str_p(';')]
>> def_block
>> code_block
>> no_node_d[str_p(';')];
arg_def
= !str_p("var")
>> identifier
>> no_node_d[ch_p(':')]
>> type_desc;
type_desc
= identifier
| basetype
| array_desc
| record_desc;
array_desc
= no_node_d[str_p("array")]
>> no_node_d[ch_p('[')]
>> int_p
>> no_node_d[str_p("..")]
>> int_p
>> no_node_d[ch_p(']')]
>> no_node_d[str_p("of")]
>> type_desc;
record_desc
= no_node_d[str_p("record")]
>> +( identifier
>> no_node_d[ch_p(':')]
>> type_desc
>> no_node_d[ch_p(';')])
>> no_node_d[str_p("end")];
statement
= if_block
| case_block
| for_block
| while_block
| repeat_block
| code_block
| procedure_call
| substitution
| identifier; // 括弧省略の手続き/関数呼び出しをここで判定
if_block
= no_node_d[str_p("if")]
>> expression
>> no_node_d[str_p("then")]
>> statement
>> !( no_node_d[str_p("else")]
>> statement);
case_block
= no_node_d[str_p("case")]
>> expression
>> no_node_d[str_p("of")]
>> *( literal_p
>> no_node_d[ch_p(':')]
>> statement
>> no_node_d[ch_p(';')])
>> !( no_node_d[str_p("else")]
>> statement)
>> no_node_d[str_p("end")];
for_block
= no_node_d[str_p("for")]
>> identifier
>> no_node_d[str_p(":=")]
>> expression
>> no_node_d[str_p("to")]
>> expression
>> no_node_d[str_p("do")]
>> statement;
while_block
= no_node_d[str_p("while")]
>> expression
>> no_node_d[str_p("do")]
>> statement;
repeat_block
= no_node_d[str_p("repeat")]
>> (statement % no_node_d[ch_p(';')])
>> no_node_d[str_p("until")]
>> expression;
procedure_call
// 引数なしなら括弧は省略できるが、解析が面倒なので別で判定する
= identifier
>> no_node_d[ch_p('(')]
>> !(expression % no_node_d[ch_p(',')])
>> no_node_d[ch_p(')')];
substitution
= variable
>> no_node_d[str_p(":=")]
>> expression;
variable
= identifier
>> !( no_node_d[ch_p('[')]
>> (expression % no_node_d[ch_p(',')])
>> no_node_d[ch_p(']')])
>> !( no_node_d[ch_p('.')]
>> variable);
expression
= simple_expression
>> !( (ch_p('=')|"<>"|'<'|'>'|"<="|">=")
>> simple_expression);
simple_expression
= !(ch_p('+')|'-')
>> term
>> *( (ch_p('+')|'-'|"or")
>> term);
term
= factor
>> *( (ch_p('*')|'/'|"div"|"mod"|"and")
>> factor);
factor
// 括弧省略の手続き/関数を変数と見分ける方法がないので意味解析に任せる
= literal_p
| procedure_call
| variable
| (no_node_d[ch_p('(')] >> expression >> no_node_d[ch_p(')')])
| ("not" >> expression);
// デバッグ出力
BOOST_SPIRIT_DEBUG_RULE(keyword);
BOOST_SPIRIT_DEBUG_RULE(basetype);
BOOST_SPIRIT_DEBUG_RULE(identifier);
BOOST_SPIRIT_DEBUG_RULE(literal_p);
BOOST_SPIRIT_DEBUG_RULE(string_p);
BOOST_SPIRIT_DEBUG_RULE(program);
BOOST_SPIRIT_DEBUG_RULE(def_block);
BOOST_SPIRIT_DEBUG_RULE(code_block);
BOOST_SPIRIT_DEBUG_RULE(const_def);
BOOST_SPIRIT_DEBUG_RULE(type_def);
BOOST_SPIRIT_DEBUG_RULE(variable_def);
BOOST_SPIRIT_DEBUG_RULE(procedure_def);
BOOST_SPIRIT_DEBUG_RULE(function_def);
BOOST_SPIRIT_DEBUG_RULE(arg_def);
BOOST_SPIRIT_DEBUG_RULE(type_desc);
BOOST_SPIRIT_DEBUG_RULE(array_desc);
BOOST_SPIRIT_DEBUG_RULE(record_desc);
BOOST_SPIRIT_DEBUG_RULE(statement);
BOOST_SPIRIT_DEBUG_RULE(if_block);
BOOST_SPIRIT_DEBUG_RULE(case_block);
BOOST_SPIRIT_DEBUG_RULE(for_block);
BOOST_SPIRIT_DEBUG_RULE(while_block);
BOOST_SPIRIT_DEBUG_RULE(repeat_block);
BOOST_SPIRIT_DEBUG_RULE(procedure_call);
BOOST_SPIRIT_DEBUG_RULE(substitution);
BOOST_SPIRIT_DEBUG_RULE(variable);
BOOST_SPIRIT_DEBUG_RULE(expression);
BOOST_SPIRIT_DEBUG_RULE(simple_expression);
BOOST_SPIRIT_DEBUG_RULE(term);
BOOST_SPIRIT_DEBUG_RULE(factor);
}
// 開始記号
const RULE(ID_PROGRAM) &start() const
{
return program;
}
#undef RULE
};
};
//------------------------------------------------------------------------------
//
// void DumpTree()
//
// @brief: 構文木をダンプする
// @update: 2011/10/08 by Y.ODA
// @args:
// tree_match<const char *>::tree_iterator &iter: 対象の木のイテレータ
// int nest: 現在の階層
// @ret:
// (void)
//
//------------------------------------------------------------------------------
void DumpTree(tree_match<const char *>::tree_iterator &iter, int nest)
{
// インデント
for (int i = 0; i < nest; ++i)
std::cout << "| ";
std::string id
= iter->value.id() == PascalParser::ID_BASETYPE ? "basetype"
: iter->value.id() == PascalParser::ID_IDENTIFIER ? "identifier"
: iter->value.id() == PascalParser::ID_LITERAL ? "literal"
: iter->value.id() == PascalParser::ID_PROGRAM ? "program"
: iter->value.id() == PascalParser::ID_DEF_BLOCK ? "def_block"
: iter->value.id() == PascalParser::ID_CODE_BLOCK ? "code_block"
: iter->value.id() == PascalParser::ID_CONST_DEF ? "const_def"
: iter->value.id() == PascalParser::ID_TYPE_DEF ? "type_def"
: iter->value.id() == PascalParser::ID_VARIABLE_DEF ? "variable_def"
: iter->value.id() == PascalParser::ID_PROCEDUE_DEF ? "procedure_def"
: iter->value.id() == PascalParser::ID_FUNCTION_DEF ? "function_def"
: iter->value.id() == PascalParser::ID_ARG_DEF ? "arg_def"
: iter->value.id() == PascalParser::ID_TYPE_DESC ? "type_desc"
: iter->value.id() == PascalParser::ID_ARRAY_DESC ? "array_desc"
: iter->value.id() == PascalParser::ID_RECORD_DESC ? "record_desc"
: iter->value.id() == PascalParser::ID_STATEMENT ? "statement"
: iter->value.id() == PascalParser::ID_IF_BLOCK ? "if_block"
: iter->value.id() == PascalParser::ID_CASE_BLOCK ? "case_block"
: iter->value.id() == PascalParser::ID_FOR_BLOCK ? "for_block"
: iter->value.id() == PascalParser::ID_WHILE_BLOCK ? "while_block"
: iter->value.id() == PascalParser::ID_REPEAT_BLOCK ? "repeat_block"
: iter->value.id() == PascalParser::ID_PROCEDURE_CALL ? "procedure_call"
: iter->value.id() == PascalParser::ID_SUBSTITUTION ? "substitution"
: iter->value.id() == PascalParser::ID_VARIABLE ? "variable"
: iter->value.id() == PascalParser::ID_EXPRESSION ? "expression"
: iter->value.id() == PascalParser::ID_SIMPLE_EXPRESSION ? "simple_expression"
: iter->value.id() == PascalParser::ID_TERM ? "term"
: iter->value.id() == PascalParser::ID_FACTOR ? "factor"
: "unknown";
std::string data(iter->value.begin(), iter->value.end());
// 要素を表示
if (data.size() > 0)
std::cout << id << " \"" << data << "\"" << std::endl;
else
std::cout << id << std::endl;
// 子ノードを表示
tree_match<const char *>::tree_iterator child;
for (child = iter->children.begin(); child != iter->children.end(); ++child)
DumpTree(child, nest+1);
}
//------------------------------------------------------------------------------
//
// bool parse_pascal()
//
// @brief: Pascalの構文解析
// @update: 2011/10/08 by Y.ODA
// @args:
// const char *data: Pascalソースコードを格納した文字列(終端'\0')
// @ret:
// (bool) 成功したかどうか
//
//------------------------------------------------------------------------------
bool parse_pascal(const char *data)
{
PascalParser g;
PascalSkipParser skip_p;
tree_parse_info<> r = ast_parse(data, g, skip_p);
//parse_info<> r = parse(data, g, skip_p);
if (r.match)
//if (r.hit)
{
std::cout << "Succeeded." << std::endl;
DumpTree(r.trees.begin(), 0);
}
else
std::cout << "Failed." << std::endl;
return r.match;
//return r.hit;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment