Last active
October 4, 2018 15:41
-
-
Save matovitch/7daa664c0954f0d8d215a2f2d8b8bfa8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <functional> | |
#include <iostream> | |
#include <limits> | |
#include <vector> | |
#include <stack> | |
enum Token | |
{ | |
NUM, | |
ADD, // from there the token are stripped from the tree | |
SUB, | |
MUL, | |
DIV, | |
LPR, | |
RPR, | |
LAST_TOKEN | |
}; | |
static constexpr int FIRST_STRIPPED_TOKEN = ADD; | |
static constexpr auto isTokenSaved = [](const int token) { return token < FIRST_STRIPPED_TOKEN; }; | |
enum NonTerm | |
{ | |
MINUS = LAST_TOKEN, | |
DIVISION, | |
SUM, | |
PRODUCT, | |
LAST_NON_TERM | |
}; | |
enum Error | |
{ | |
TOKEN_EOF = LAST_NON_TERM, | |
TOKEN_MISMATCH, | |
UNEXPECTED_EOF, | |
ALT_ERROR, | |
TAG_ERROR, | |
TAG_VALUE, | |
TAG_TOKEN | |
}; | |
static const char* LABELS[] = | |
{ | |
"NUM", | |
"ADD", | |
"SUB", | |
"MUL", | |
"DIV", | |
"LPR", | |
"RPR", | |
"MINUS", | |
"DIVISION", | |
"SUM", | |
"PRODUCT", | |
"TOKEN_EOF", | |
"TOKEN_MISMATCH", | |
"UNEXPECTED_EOF", | |
"ALT_ERROR", | |
"TAG_ERROR", | |
"TAG_VALUE", | |
"TAG_TOKEN" | |
}; | |
class Reader | |
{ | |
public: | |
Reader(const std::vector<int>& tokens) : | |
_head{tokens.data()}, | |
_tail{tokens.data() + tokens.size()} | |
{} | |
void advance() | |
{ | |
_head++; | |
} | |
int look() const | |
{ | |
return *_head; | |
} | |
bool hasReachedEOF() const | |
{ | |
return _head >= _tail; | |
} | |
const int* capture() const | |
{ | |
return _head; | |
} | |
void restore(const int* head) | |
{ | |
_head = head; | |
} | |
private: | |
const int* _head; | |
const int* const _tail; | |
}; | |
struct ParseTag | |
{ | |
ParseTag() = default; | |
ParseTag(int iTag, int iData) : tag{iTag}, data{iData} {} | |
int tag; | |
int data; | |
}; | |
struct ParseState | |
{ | |
ParseState(const std::vector<int>& tokens) : _reader{tokens} {} | |
std::size_t size() const | |
{ | |
return _parseTags.size(); | |
} | |
bool hasReachedEOF() const | |
{ | |
return _reader.hasReachedEOF(); | |
} | |
void advance() | |
{ | |
_reader.advance(); | |
} | |
int look() const | |
{ | |
return _reader.look(); | |
} | |
void save(int tag, int data) | |
{ | |
_parseTags.emplace_back(tag, data); | |
} | |
void saveError(int tag, int data) | |
{ | |
_errorTags.emplace_back(tag, data); | |
} | |
bool isOnError() const | |
{ | |
return !(_errorTags.empty()); | |
} | |
std::size_t sizeError() const | |
{ | |
return _errorTags.size(); | |
} | |
void clearError(std::size_t size) | |
{ | |
_errorTags.resize(size); | |
} | |
std::size_t minParse() const | |
{ | |
return (_parseTags.empty()) ? 1 | |
: _parseTags.back().data + 1; | |
} | |
void display(std::ostream& os) | |
{ | |
if (isOnError()) | |
{ | |
os << "Error !\n"; | |
for (auto it = _errorTags.rbegin(); | |
it != _errorTags.rend(); | |
it++) | |
{ | |
os << "(" << LABELS[it->tag] << ", " << it->data << ")"; | |
} | |
os << '\n'; | |
} | |
for (auto it = _parseTags.rbegin(); | |
it != _parseTags.rend(); | |
it++) | |
{ | |
os << "(" << LABELS[it->tag] << ", " << it->data << ")"; | |
} | |
} | |
const int* capture() | |
{ | |
return _reader.capture(); | |
} | |
void restore(const int* head) | |
{ | |
_reader.restore(head); | |
} | |
private: | |
Reader _reader; | |
std::vector<ParseTag> _parseTags; | |
std::vector<ParseTag> _errorTags; | |
}; | |
auto tok(const int token) | |
{ | |
return [token](ParseState& parseState) | |
{ | |
if (parseState.hasReachedEOF()) | |
{ | |
parseState.saveError(token , 0); | |
parseState.saveError(TOKEN_EOF , 1); | |
return false; | |
} | |
const int look = parseState.look(); | |
if (token != look) | |
{ | |
parseState.saveError(look , 0); | |
parseState.saveError(token , 0); | |
parseState.saveError(TOKEN_MISMATCH , 2); | |
return false; | |
} | |
parseState.advance(); | |
if (isTokenSaved(token)) | |
{ | |
parseState.save(token, 0); | |
} | |
return true; | |
}; | |
} | |
auto seq() | |
{ | |
return [](ParseState&) | |
{ | |
return true; | |
}; | |
} | |
template <class Parser, class... Parsers> | |
auto seq(Parser parser, Parsers&&... parsers) | |
{ | |
return [parser, parsers...](ParseState& parseState) | |
{ | |
if (!parser(parseState)) | |
{ | |
return false; | |
} | |
return seq(parsers...)(parseState); | |
}; | |
} | |
template <class Parser> | |
auto rep(Parser&& parser) | |
{ | |
return [parser](ParseState& parseState) | |
{ | |
const std::size_t sizeError = parseState.sizeError(); | |
const int* snapshot; | |
do | |
{ | |
snapshot = parseState.capture(); | |
} | |
while (parser(parseState)); | |
parseState.restore(snapshot); | |
parseState.clearError(sizeError); | |
return true; | |
}; | |
} | |
enum TagUnaryPolicy | |
{ | |
DROP, | |
KEEP | |
}; | |
template <int TAG, TagUnaryPolicy TAG_UNARY_POLICY = DROP, class Parser> | |
auto tag(Parser&& parser) | |
{ | |
return [parser](ParseState& parseState) | |
{ | |
const int firstToken = parseState.look(); | |
const std::size_t size = parseState.size(); | |
const std::size_t sizeError = parseState.sizeError(); | |
if (!parser(parseState)) | |
{ | |
const int lastToken = parseState.look(); | |
parseState.saveError(lastToken , 0); | |
parseState.saveError(TAG_TOKEN , 1); | |
parseState.saveError(firstToken , 0); | |
parseState.saveError(TAG_TOKEN , 1); | |
parseState.saveError(TAG , 0); | |
parseState.saveError(TAG_VALUE , 1); | |
parseState.saveError(TAG_ERROR , parseState.sizeError() - sizeError); | |
return false; | |
} | |
const std::size_t sizeDiff = parseState.size() - size; | |
if (TAG_UNARY_POLICY == KEEP || sizeDiff > parseState.minParse()) | |
{ | |
parseState.save(TAG, sizeDiff); | |
} | |
return true; | |
}; | |
} | |
auto alt() | |
{ | |
return [](ParseState& parseState, std::size_t initialSizeError) | |
{ | |
parseState.saveError(ALT_ERROR, parseState.sizeError() - initialSizeError); | |
return false; | |
}; | |
} | |
template <class Parser, class... Parsers> | |
auto alt(Parser&& parser, Parsers&&... parsers) | |
{ | |
return [parser, parsers...](ParseState& parseState, std::size_t initialSizeError = std::numeric_limits<std::size_t>::max()) | |
{ | |
initialSizeError = (initialSizeError == std::numeric_limits<std::size_t>::max()) ? parseState.sizeError() | |
: initialSizeError; | |
const int* snapshot = parseState.capture(); | |
if (parser(parseState)) | |
{ | |
parseState.clearError(initialSizeError); | |
return true; | |
} | |
parseState.restore(snapshot); | |
return alt(parsers...)(parseState, initialSizeError); | |
}; | |
} | |
enum LoopIndex | |
{ | |
ATOM_L1, | |
ATOM_L2 | |
}; | |
using Parser = std::function<bool(ParseState&)>; | |
template <int INDEX> | |
class Loop | |
{ | |
public: | |
bool operator()(ParseState& parseState) const | |
{ | |
return PARSER(parseState); | |
} | |
private: | |
static const Parser PARSER; | |
}; | |
template <int INDEX> | |
auto makeLoop = []() { return Loop<INDEX>{}; }; | |
#define DEFINE_LOOP(INDEX, parser) \ | |
template <> \ | |
const Parser Loop<INDEX>::PARSER = parser | |
static const auto atom = alt(tok(NUM), seq(tok(LPR), makeLoop<ATOM_L1>(), tok(RPR)), tag<MINUS, KEEP>(seq(tok(SUB), makeLoop<ATOM_L2>())), | |
seq(tok(ADD), makeLoop<ATOM_L2>())); | |
static const auto divitive = tag<DIVISION, KEEP>(seq(tok(DIV), atom)); | |
static const auto mulitive = seq(tok(MUL), atom) ; | |
static const auto product = tag<PRODUCT>(seq(atom, rep(alt(mulitive, | |
divitive)))); | |
static const auto negative = tag<MINUS, KEEP>(seq(tok(SUB), product)) ; | |
static const auto positive = seq(tok(ADD), product) ; | |
static const auto sum = tag<SUM>(seq(alt(product, negative), rep(alt(negative, | |
positive)))); | |
static const auto& expression = sum; | |
DEFINE_LOOP(ATOM_L1, sum ); | |
DEFINE_LOOP(ATOM_L2, atom ); | |
int main() | |
{ | |
// (PRODUCT, 10)(PRODUCT, 4)(DIVISION, 2)(MINUS, 1)(NUM, 0)(NUM, 0)(NUM, 0)(MINUS, 3)(SUM, 2)(NUM, 0)(NUM, 0) | |
// -(1 + 1) * 1 * (1 / -1) | |
const std::vector<int> tokens = {SUB, LPR, NUM, ADD, NUM, RPR, MUL, NUM, MUL, LPR, NUM, DIV, SUB, NUM, RPR}; | |
ParseState parseState{tokens}; | |
expression(parseState); | |
parseState.display(std::cout); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment