Skip to content

Instantly share code, notes, and snippets.

@matovitch
Last active October 4, 2018 15:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matovitch/7daa664c0954f0d8d215a2f2d8b8bfa8 to your computer and use it in GitHub Desktop.
Save matovitch/7daa664c0954f0d8d215a2f2d8b8bfa8 to your computer and use it in GitHub Desktop.
#include <functional>
#include <iostream>
#include <limits>
#include <vector>
#include <stack>
enum Token
{
NUM,
ADD, // from there the token are stripped from the tree
SUB,
MUL,
DIV,
LPR,
RPR,
LAST_TOKEN
};
static constexpr int FIRST_STRIPPED_TOKEN = ADD;
static constexpr auto isTokenSaved = [](const int token) { return token < FIRST_STRIPPED_TOKEN; };
enum NonTerm
{
MINUS = LAST_TOKEN,
DIVISION,
SUM,
PRODUCT,
LAST_NON_TERM
};
enum Error
{
TOKEN_EOF = LAST_NON_TERM,
TOKEN_MISMATCH,
UNEXPECTED_EOF,
ALT_ERROR,
TAG_ERROR,
TAG_VALUE,
TAG_TOKEN
};
static const char* LABELS[] =
{
"NUM",
"ADD",
"SUB",
"MUL",
"DIV",
"LPR",
"RPR",
"MINUS",
"DIVISION",
"SUM",
"PRODUCT",
"TOKEN_EOF",
"TOKEN_MISMATCH",
"UNEXPECTED_EOF",
"ALT_ERROR",
"TAG_ERROR",
"TAG_VALUE",
"TAG_TOKEN"
};
class Reader
{
public:
Reader(const std::vector<int>& tokens) :
_head{tokens.data()},
_tail{tokens.data() + tokens.size()}
{}
void advance()
{
_head++;
}
int look() const
{
return *_head;
}
bool hasReachedEOF() const
{
return _head >= _tail;
}
const int* capture() const
{
return _head;
}
void restore(const int* head)
{
_head = head;
}
private:
const int* _head;
const int* const _tail;
};
struct ParseTag
{
ParseTag() = default;
ParseTag(int iTag, int iData) : tag{iTag}, data{iData} {}
int tag;
int data;
};
struct ParseState
{
ParseState(const std::vector<int>& tokens) : _reader{tokens} {}
std::size_t size() const
{
return _parseTags.size();
}
bool hasReachedEOF() const
{
return _reader.hasReachedEOF();
}
void advance()
{
_reader.advance();
}
int look() const
{
return _reader.look();
}
void save(int tag, int data)
{
_parseTags.emplace_back(tag, data);
}
void saveError(int tag, int data)
{
_errorTags.emplace_back(tag, data);
}
bool isOnError() const
{
return !(_errorTags.empty());
}
std::size_t sizeError() const
{
return _errorTags.size();
}
void clearError(std::size_t size)
{
_errorTags.resize(size);
}
std::size_t minParse() const
{
return (_parseTags.empty()) ? 1
: _parseTags.back().data + 1;
}
void display(std::ostream& os)
{
if (isOnError())
{
os << "Error !\n";
for (auto it = _errorTags.rbegin();
it != _errorTags.rend();
it++)
{
os << "(" << LABELS[it->tag] << ", " << it->data << ")";
}
os << '\n';
}
for (auto it = _parseTags.rbegin();
it != _parseTags.rend();
it++)
{
os << "(" << LABELS[it->tag] << ", " << it->data << ")";
}
}
const int* capture()
{
return _reader.capture();
}
void restore(const int* head)
{
_reader.restore(head);
}
private:
Reader _reader;
std::vector<ParseTag> _parseTags;
std::vector<ParseTag> _errorTags;
};
auto tok(const int token)
{
return [token](ParseState& parseState)
{
if (parseState.hasReachedEOF())
{
parseState.saveError(token , 0);
parseState.saveError(TOKEN_EOF , 1);
return false;
}
const int look = parseState.look();
if (token != look)
{
parseState.saveError(look , 0);
parseState.saveError(token , 0);
parseState.saveError(TOKEN_MISMATCH , 2);
return false;
}
parseState.advance();
if (isTokenSaved(token))
{
parseState.save(token, 0);
}
return true;
};
}
auto seq()
{
return [](ParseState&)
{
return true;
};
}
template <class Parser, class... Parsers>
auto seq(Parser parser, Parsers&&... parsers)
{
return [parser, parsers...](ParseState& parseState)
{
if (!parser(parseState))
{
return false;
}
return seq(parsers...)(parseState);
};
}
template <class Parser>
auto rep(Parser&& parser)
{
return [parser](ParseState& parseState)
{
const std::size_t sizeError = parseState.sizeError();
const int* snapshot;
do
{
snapshot = parseState.capture();
}
while (parser(parseState));
parseState.restore(snapshot);
parseState.clearError(sizeError);
return true;
};
}
enum TagUnaryPolicy
{
DROP,
KEEP
};
template <int TAG, TagUnaryPolicy TAG_UNARY_POLICY = DROP, class Parser>
auto tag(Parser&& parser)
{
return [parser](ParseState& parseState)
{
const int firstToken = parseState.look();
const std::size_t size = parseState.size();
const std::size_t sizeError = parseState.sizeError();
if (!parser(parseState))
{
const int lastToken = parseState.look();
parseState.saveError(lastToken , 0);
parseState.saveError(TAG_TOKEN , 1);
parseState.saveError(firstToken , 0);
parseState.saveError(TAG_TOKEN , 1);
parseState.saveError(TAG , 0);
parseState.saveError(TAG_VALUE , 1);
parseState.saveError(TAG_ERROR , parseState.sizeError() - sizeError);
return false;
}
const std::size_t sizeDiff = parseState.size() - size;
if (TAG_UNARY_POLICY == KEEP || sizeDiff > parseState.minParse())
{
parseState.save(TAG, sizeDiff);
}
return true;
};
}
auto alt()
{
return [](ParseState& parseState, std::size_t initialSizeError)
{
parseState.saveError(ALT_ERROR, parseState.sizeError() - initialSizeError);
return false;
};
}
template <class Parser, class... Parsers>
auto alt(Parser&& parser, Parsers&&... parsers)
{
return [parser, parsers...](ParseState& parseState, std::size_t initialSizeError = std::numeric_limits<std::size_t>::max())
{
initialSizeError = (initialSizeError == std::numeric_limits<std::size_t>::max()) ? parseState.sizeError()
: initialSizeError;
const int* snapshot = parseState.capture();
if (parser(parseState))
{
parseState.clearError(initialSizeError);
return true;
}
parseState.restore(snapshot);
return alt(parsers...)(parseState, initialSizeError);
};
}
enum LoopIndex
{
ATOM_L1,
ATOM_L2
};
using Parser = std::function<bool(ParseState&)>;
template <int INDEX>
class Loop
{
public:
bool operator()(ParseState& parseState) const
{
return PARSER(parseState);
}
private:
static const Parser PARSER;
};
template <int INDEX>
auto makeLoop = []() { return Loop<INDEX>{}; };
#define DEFINE_LOOP(INDEX, parser) \
template <> \
const Parser Loop<INDEX>::PARSER = parser
static const auto atom = alt(tok(NUM), seq(tok(LPR), makeLoop<ATOM_L1>(), tok(RPR)), tag<MINUS, KEEP>(seq(tok(SUB), makeLoop<ATOM_L2>())),
seq(tok(ADD), makeLoop<ATOM_L2>()));
static const auto divitive = tag<DIVISION, KEEP>(seq(tok(DIV), atom));
static const auto mulitive = seq(tok(MUL), atom) ;
static const auto product = tag<PRODUCT>(seq(atom, rep(alt(mulitive,
divitive))));
static const auto negative = tag<MINUS, KEEP>(seq(tok(SUB), product)) ;
static const auto positive = seq(tok(ADD), product) ;
static const auto sum = tag<SUM>(seq(alt(product, negative), rep(alt(negative,
positive))));
static const auto& expression = sum;
DEFINE_LOOP(ATOM_L1, sum );
DEFINE_LOOP(ATOM_L2, atom );
int main()
{
// (PRODUCT, 10)(PRODUCT, 4)(DIVISION, 2)(MINUS, 1)(NUM, 0)(NUM, 0)(NUM, 0)(MINUS, 3)(SUM, 2)(NUM, 0)(NUM, 0)
// -(1 + 1) * 1 * (1 / -1)
const std::vector<int> tokens = {SUB, LPR, NUM, ADD, NUM, RPR, MUL, NUM, MUL, LPR, NUM, DIV, SUB, NUM, RPR};
ParseState parseState{tokens};
expression(parseState);
parseState.display(std::cout);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment