Last active
January 19, 2021 13:46
-
-
Save ZhekehZ/e156bac3f1af36dd007fce329fb277aa to your computer and use it in GitHub Desktop.
C++ compile time parser (without using constexpr strings) https://godbolt.org/z/8qxY3Y
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <optional> | |
#include <utility> | |
#include <type_traits> | |
#include <tuple> | |
// C++ compile time parser (without using constexpr strings) | |
// PARSER DEFINITION | |
#define PARSER(...) template <__VA_ARGS__ __VA_OPT__(,) unsigned N, const char Str[N], unsigned Pos> | |
// PARSER AS PARSER ARGUMENT | |
#define PARSER_ARG template <unsigned N1, const char [N1], unsigned> typename | |
// COMBINE PARSERS USING PARSER COMBINATOR | |
#define COMBINE(name, combiner, ...) PARSER() using name = combiner<__VA_ARGS__, N, Str, Pos>; | |
// EMPTY RESULT | |
struct empty {}; | |
template <typename T> | |
static constexpr bool is_empty(T x) { return std::is_same_v<std::decay_t<T>, empty>; } | |
template <typename T> struct parse_result { T result; unsigned next_pos; }; // helper struct | |
// DIGIT PARSER | |
PARSER() struct digit { | |
static constexpr bool is_ok = Pos + 1 < N && Str[Pos] <= '9' && Str[Pos] >= '0'; | |
private: | |
static constexpr parse_result<unsigned> impl() { | |
if constexpr (is_ok) return { Str[Pos] - '0', Pos + 1 }; | |
else return { 0 , Pos }; | |
} | |
public: | |
static constexpr unsigned result = impl().result; | |
static constexpr unsigned next_pos = impl().next_pos; | |
}; | |
// SPACE PARSER | |
PARSER() struct blank { | |
static constexpr bool is_ok = Pos < N && Str[Pos] == ' '; | |
static constexpr empty result{}; | |
static constexpr unsigned next_pos = is_ok ? Pos + 1 : Pos; | |
}; | |
// 'MANY' PARSER COMBINATOR | |
PARSER(PARSER_ARG Parser) struct many { | |
static constexpr bool is_ok = Parser<N, Str, Pos>::is_ok; | |
private: | |
static constexpr auto impl() { | |
if constexpr (is_ok) { | |
using next = many<Parser, N, Str, Pos + 1>; | |
constexpr auto head = Parser<N, Str, Pos>::result; | |
if constexpr (is_empty(head)) | |
return parse_result(next::result, next::next_pos); | |
else { | |
constexpr auto tail = next::result; | |
if constexpr (is_empty(tail)) | |
return parse_result(std::tuple{head}, next::next_pos); | |
else return parse_result(std::tuple_cat(std::tuple{head}, tail), next::next_pos); | |
} | |
} else return parse_result(empty{}, Pos); | |
} | |
public: | |
static constexpr auto result = impl().result; | |
static constexpr unsigned next_pos = impl().next_pos; | |
}; | |
COMBINE(digits, many, digit); | |
COMBINE(blanks, many, blank); | |
// NAT PARSER | |
PARSER() struct natural { | |
private: | |
using dts = digits<N, Str, Pos>; | |
struct _ { | |
unsigned x; | |
constexpr _(unsigned x) : x(x) {} | |
constexpr _ operator * (_ other) { return x * 10 + other.x; } | |
}; | |
public: | |
static constexpr bool is_ok = dts::is_ok; | |
static constexpr unsigned result = std::apply( | |
[] (auto ... args) { return (... * _(args)).x; }, dts::result | |
); | |
static constexpr unsigned next_pos = dts::next_pos; | |
}; | |
// 'SEQUENCE' PARSER COMBINATOR | |
PARSER(PARSER_ARG Parser1, PARSER_ARG Parser2) struct seq { | |
private: | |
using result1 = Parser1<N, Str, Pos>; | |
using result2 = Parser2<N, Str, result1::next_pos>; | |
static constexpr bool is_empty1 = is_empty(result1::result); | |
static constexpr bool is_empty2 = is_empty(result2::result); | |
static constexpr auto impl() { | |
if constexpr (!is_empty1 && !is_empty2) return std::pair { result1::result, result2::result }; | |
else if constexpr (!is_empty1) return result1::result; | |
else if constexpr (!is_empty2) return result2::result; | |
else return empty{}; | |
} | |
public: | |
static constexpr bool is_ok = result1::is_ok && result2::is_ok; | |
static constexpr auto result = impl(); | |
static constexpr unsigned next_pos = is_ok ? result2::next_pos : Pos; | |
}; | |
// TOKEN PARSER | |
PARSER(unsigned M, const char Tok[M]) struct token { | |
private: | |
template <unsigned CPos> | |
static constexpr bool process() { | |
if constexpr (CPos + 1 == M) return true; | |
else if constexpr (Tok[CPos] != Str[Pos + CPos]) return false; | |
else return process<CPos + 1>(); | |
} | |
public: | |
static constexpr bool is_ok = process<0>(); | |
static constexpr unsigned next_pos = is_ok ? Pos + M - 1 : Pos; | |
static constexpr empty result{}; | |
}; | |
#define TOK(name, str) COMBINE(name, token, sizeof(str), str) | |
// 'ALTERNATIVE' PARSER COMBINATOR | |
PARSER(PARSER_ARG Parser1, PARSER_ARG Parser2) struct alt { | |
private: | |
static constexpr auto impl() { | |
using try_parse1 = Parser1<N, Str, Pos>; | |
if constexpr (try_parse1::is_ok) return try_parse1{}; | |
else return Parser2<N, Str, Pos>{}; | |
} | |
public: | |
static constexpr bool is_ok = decltype(impl())::is_ok; | |
static constexpr auto result = decltype(impl())::result; | |
static constexpr unsigned next_pos = decltype(impl())::next_pos; | |
}; | |
// MAIN FUNCTION AND MACROS | |
template <unsigned N, const char Str[N], PARSER_ARG Parser> | |
static constexpr decltype(Parser<N, Str, 0>::result) parse = Parser<N, Str, 0>::result; | |
#define PARSE(str, parser) parse<sizeof(str), str, parser> | |
// TESTS | |
// 1. DIGIT | |
static constexpr char test_digit[] = "7"; | |
static_assert(PARSE(test_digit, digit) == 7); | |
// 2. MANY DIGIT | |
static constexpr char test_digits[] = "88005553535"; | |
static_assert(PARSE(test_digits, digits) == std::tuple(8, 8, 0, 0, 5, 5, 5, 3, 5, 3, 5)); | |
// 3. NATURAL | |
static constexpr char test_nat[] = "2033"; | |
static_assert(PARSE(test_nat, natural) == 2033); | |
// 4. BLANKS | |
static constexpr char test_blanks[] = " "; | |
static_assert(is_empty(PARSE(test_blanks, blanks))); | |
// 5. PAIR OF NATS : SEQ (SEQ (NAT, MANY BLANK), NAT) | |
static constexpr char nat_nat_test[] = "1488 2727"; | |
COMBINE(nat_, seq, natural, blanks); // nat = blanks . natural | |
COMBINE(nat_nat, seq, nat_, natural); // nat_nat = natural . blanks . natural | |
static_assert(PARSE(nat_nat_test, nat_nat) == std::pair(1488u, 2727u)); | |
// 6. TOKEN | |
static constexpr char token_test[] = "12 plus 588"; | |
static constexpr char plus_token_str[] = "plus"; | |
TOK(plus_tok, plus_token_str); | |
COMBINE(nat_plus, seq, nat_, plus_tok); | |
COMBINE(nat_plus_, seq, nat_plus, blanks); | |
COMBINE(nat_plus_nat, seq, nat_plus_, natural); | |
static_assert(PARSE(token_test, nat_plus_nat) == std::pair(12u, 588u)); | |
// 7. ALTERNATIVE | |
static constexpr char alternative_test1[] = "hello 123"; | |
static constexpr char alternative_test2[] = "world 123"; | |
static constexpr char hello_token_str[] = "hello"; | |
static constexpr char world_token_str[] = "world"; | |
TOK(hello_token, hello_token_str); | |
TOK(world_token, world_token_str); | |
COMBINE(hello_world, alt, hello_token, world_token); | |
COMBINE(hello_world_, seq, hello_world, blanks); | |
COMBINE(hello_world_nat, seq, hello_world_, natural); | |
static_assert(PARSE(alternative_test1, hello_world_nat) == 123); | |
static_assert(PARSE(alternative_test2, hello_world_nat) == 123); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment