Skip to content

Instantly share code, notes, and snippets.

@ioperations
Last active December 5, 2022 15:30
Show Gist options
  • Save ioperations/4f17864db199dfd19f21a7a19e41aa2c to your computer and use it in GitHub Desktop.
Save ioperations/4f17864db199dfd19f21a7a19e41aa2c to your computer and use it in GitHub Desktop.
parser
#include <cstdlib>
#include <exception>
#include <memory>
#include <set>
#include <string>
#include <vector>
enum node_type {
NNUMBER,
NOPERATOR,
NEOF,
};
struct Object {
node_type type;
virtual void Print() {}
virtual ~Object() {}
};
struct Number : public Object {
long value;
Number(std::string s) { value = atol(s.c_str()); }
};
struct BinaryNode : public Object {
char op;
std::unique_ptr<Object> lhs;
std::unique_ptr<Object> rhs;
BinaryNode() : op(), lhs(nullptr), rhs(nullptr) {}
BinaryNode(char op, std::unique_ptr<Object> lhs,
std::unique_ptr<Object> rhs)
: op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {}
};
/*! \class tokenize
* \brief Brief class description
*
* Detailed description
*/
class Tokenize {
std::set<char> op = {'+', '-', '*', '/'};
std::string m_string;
int m_size;
int now;
std::string literal;
public:
Tokenize(const std::string& s) {
m_string = s;
m_size = s.size();
now = 0;
};
std::string Value() const noexcept { return literal; }
node_type LookAHead(std::string& tmp_literal) {
tmp_literal = "";
int my_now = now;
if (my_now >= m_size) return node_type::NEOF;
if (op.count(m_string[my_now])) {
tmp_literal = m_string[my_now];
my_now++;
return node_type::NOPERATOR;
}
while (my_now < m_size && '0' <= m_string[my_now] &&
m_string[my_now] <= '9') {
tmp_literal += m_string[my_now];
my_now++;
}
return node_type::NNUMBER;
}
node_type Next() {
literal = "";
if (now >= m_size) return node_type::NEOF;
if (op.count(m_string[now])) {
literal = m_string[now];
now++;
return node_type::NOPERATOR;
}
while (now < m_size && '0' <= m_string[now] && m_string[now] <= '9') {
literal += m_string[now];
now++;
}
return node_type::NNUMBER;
}
virtual ~Tokenize(){};
};
class Error : public std::exception {
public:
Error(const std::string& s) { m_s = s; }
virtual ~Error() {}
const char* what() const noexcept override { return m_s.c_str(); }
private:
/* data */
std::string m_s;
};
class Parse {
Tokenize& m_tokenizer;
public:
Parse(Tokenize& t) : m_tokenizer(t){};
std::unique_ptr<Object> StartParse() {
// pass
return nullptr;
}
std::unique_ptr<Object> ParseExpression() {
auto lhs = ParsePrimary();
if (!lhs) return nullptr;
return ParseBinary(0, std::move(lhs));
}
std::unique_ptr<Object> ParsePrimary() {
// pass
auto z = m_tokenizer.Next();
if (z == node_type::NEOF) return nullptr;
if (z == node_type::NNUMBER)
return std::make_unique<Number>(m_tokenizer.Value());
throw Error("should not b a operator");
return nullptr;
}
int GetTokenPrecendence(char c) {
if (c == '/' || c == '*') return 20;
if (c == '+' || c == '-') return 10;
return 0;
}
std::unique_ptr<Object> ParseBinary(int precendence,
std::unique_ptr<Object> lhs) {
while (true) {
std::string z;
auto typ = m_tokenizer.LookAHead(z);
if (typ == node_type::NEOF) return lhs;
if (typ != node_type::NOPERATOR)
throw Error("should be a Operator");
char binaryop = z[0];
int p = GetTokenPrecendence(binaryop);
if (p < precendence) return lhs;
m_tokenizer.Next();
auto rhs = ParsePrimary();
if (!rhs) {
m_tokenizer.Next();
return nullptr;
}
std::string literal;
auto eof = m_tokenizer.LookAHead(literal);
if (eof == node_type::NEOF) {
m_tokenizer.Next();
return std::make_unique<BinaryNode>(binaryop, std::move(lhs),
std::move(rhs));
}
auto next_precedence = GetTokenPrecendence(literal[0]);
if (p < next_precedence) {
rhs = ParseBinary(p + 1, std::move(rhs));
if (!rhs) return nullptr;
}
lhs = std::make_unique<BinaryNode>(binaryop, std::move(lhs),
std::move(rhs));
}
}
virtual ~Parse(){};
private:
};
#include <gtest/gtest.h>
#include <iostream>
TEST(token, t1) {
Tokenize t("1+2");
auto z = t.Next();
EXPECT_EQ(z, node_type::NNUMBER);
EXPECT_EQ(t.Value(), "1");
z = t.Next();
EXPECT_EQ(z, node_type::NOPERATOR);
EXPECT_EQ(t.Value(), "+");
z = t.Next();
EXPECT_EQ(z, node_type::NNUMBER);
EXPECT_EQ(t.Value(), "2");
z = t.Next();
EXPECT_EQ(z, node_type::NEOF);
EXPECT_EQ(t.Value(), "");
}
TEST(token, t2) {
Tokenize t("1+2*3");
std::string literal;
auto z = t.LookAHead(literal);
EXPECT_EQ(z, node_type::NNUMBER);
EXPECT_EQ(literal, "1");
z = t.Next();
EXPECT_EQ(z, node_type::NNUMBER);
EXPECT_EQ(t.Value(), "1");
z = t.LookAHead(literal);
EXPECT_EQ(z, node_type::NOPERATOR);
EXPECT_EQ(literal, "+");
z = t.Next();
EXPECT_EQ(z, node_type::NOPERATOR);
EXPECT_EQ(t.Value(), "+");
z = t.LookAHead(literal);
EXPECT_EQ(z, node_type::NNUMBER);
EXPECT_EQ(literal, "2");
z = t.Next();
EXPECT_EQ(z, node_type::NNUMBER);
EXPECT_EQ(t.Value(), "2");
z = t.LookAHead(literal);
EXPECT_EQ(z, node_type::NOPERATOR);
EXPECT_EQ(literal, "*");
z = t.Next();
EXPECT_EQ(z, node_type::NOPERATOR);
EXPECT_EQ(t.Value(), "*");
z = t.LookAHead(literal);
EXPECT_EQ(z, node_type::NNUMBER);
EXPECT_EQ(literal, "3");
z = t.Next();
EXPECT_EQ(z, node_type::NNUMBER);
EXPECT_EQ(t.Value(), "3");
z = t.LookAHead(literal);
EXPECT_EQ(z, node_type::NEOF);
EXPECT_EQ(literal, "");
z = t.Next();
EXPECT_EQ(z, node_type::NEOF);
EXPECT_EQ(t.Value(), "");
}
namespace {
/**
* @brief 返回结果逆波兰表达式
* @param @root ast树的根节点
* @param @str 返回结果字符串
* @return void
*/
void GenerateExpr(std::unique_ptr<Object> root, std::string& str) {
// 逆波兰表达式非常适合基于堆栈的虚拟机来执行表达式
if (root == nullptr) return;
if (auto* t = dynamic_cast<Number*>(root.get())) {
str += std::to_string(t->value);
return;
}
if (auto* t = dynamic_cast<BinaryNode*>(root.get())) {
GenerateExpr(std::move(t->lhs), str);
str += " ";
GenerateExpr(std::move(t->rhs), str);
str += " ";
str += std::string(1, t->op);
return;
}
}
/**
* @brief 生成lisp代码
* @param @root ast树的根节点
* @param @str 返回结果字符串
* @return void
*/
void GenerateExprLisp(std::unique_ptr<Object> root, std::string& str) {
// 生成lisp语言的表达式
if (root == nullptr) return;
if (auto* t = dynamic_cast<Number*>(root.get())) {
str += std::to_string(t->value);
return;
}
if (auto* t = dynamic_cast<BinaryNode*>(root.get())) {
str += "(" + std::string(1, t->op) + " ";
GenerateExprLisp(std::move(t->lhs), str);
str += " ";
GenerateExprLisp(std::move(t->rhs), str);
str += ")";
return;
}
}
} // namespace
TEST(parse, t0) {
Tokenize t("1+2*3+4");
Parse parser(t);
auto it = parser.ParseExpression();
std::vector<std::string> vec;
std::string str;
GenerateExpr(std::move(it), str);
/*
* +
* / \
* 1 *
* / \
* 2 3
*
*/
EXPECT_EQ(str, "1 2 3 * + 4 +");
}
TEST(parse, t2) {
Tokenize t("1+2*3+4+5+6+7+8");
Parse parser(t);
auto it = parser.ParseExpression();
std::string str;
GenerateExprLisp(std::move(it), str);
/*
* +
* / \
* 1 *
* / \
* 2 3
*
*/
std::string expected = "(+ (+ (+ (+ (+ (+ 1 (* 2 3)) 4) 5) 6) 7) 8)";
EXPECT_EQ(str, expected);
}
TEST(parse, t3) {
Tokenize t("1+2*3+4+5*6+7+8");
Parse parser(t);
auto it = parser.ParseExpression();
std::string str;
GenerateExprLisp(std::move(it), str);
/*
* +
* / \
* 1 *
* / \
* 2 3
*
*/
std::string expected = "(+ (+ (+ (+ (+ 1 (* 2 3)) 4) (* 5 6)) 7) 8)";
EXPECT_EQ(str, expected);
}
int main(int argc, char* argv[]) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
@ioperations
Copy link
Author

ioperations commented Aug 26, 2022

clang++ binary_expr.cc -lgtest -fsanitize=address -o binary_expr
./binary_expr

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment