Skip to content

Instantly share code, notes, and snippets.

@marty1885
Created March 31, 2022 13:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marty1885/e20cc8a7d141b48ab9a661d756f913e5 to your computer and use it in GitHub Desktop.
Save marty1885/e20cc8a7d141b48ab9a661d756f913e5 to your computer and use it in GitHub Desktop.
#include <string>
#include <vector>
#include <functional>
#include <iostream>
#include <optional>
#include <cassert>
#include <unordered_set>
#include <cppcoro/generator.hpp>
#include <cppcoro/task.hpp>
#include <cppcoro/sync_wait.hpp>
using ParseItem = std::pair<std::string, std::string_view>;
using ResultGenerator = cppcoro::generator<ParseItem>;
using Parser = std::function<ResultGenerator(std::string_view)>;
auto digit() -> Parser
{
return [](std::string_view input) -> ResultGenerator
{
if (input.empty())
co_return;
else if (input[0] >= '0' && input[0] <= '9')
co_yield { std::string(1, input[0]), input.substr(1) };
};
}
auto alphanumeric() -> Parser
{
return [](std::string_view input) -> ResultGenerator
{
if (input.empty())
co_return;
else if (input[0] >= 'a' && input[0] <= 'z')
co_yield { std::string(1, input[0]), input.substr(1) };
else if (input[0] >= 'A' && input[0] <= 'Z')
co_yield { std::string(1, input[0]), input.substr(1) };
else if (input[0] >= '0' && input[0] <= '9' || input[0] == ' ')
co_yield { std::string(1, input[0]), input.substr(1) };
};
}
auto literal(std::string literal_) -> Parser
{
return [literal = std::move(literal_)](std::string_view input) -> ResultGenerator
{
if (input.starts_with(literal))
co_yield { "["+literal+"]", input.substr(literal.size()) };
};
}
auto nullstring() -> Parser
{
return [](std::string_view input) -> ResultGenerator
{
if (input.empty())
co_yield { "", input };
};
}
auto Or(Parser lhs, Parser rhs) -> Parser
{
return [lhs = std::move(lhs), rhs = std::move(rhs)](std::string_view input) -> ResultGenerator
{
for(auto item : lhs(input))
co_yield item;
for(auto item : rhs(input))
co_yield item;
};
}
auto then(Parser lhs, Parser rhs) -> Parser
{
return [lhs = std::move(lhs), rhs = std::move(rhs)](std::string_view input) -> ResultGenerator
{
for(auto item : lhs(input))
{
auto [lhs_result, lhs_remainder] = item;
for(auto item : rhs(lhs_remainder))
{
auto [rhs_result, rhs_remainder] = item;
co_yield { lhs_result + rhs_result, rhs_remainder };
}
}
};
}
ResultGenerator recursiveParse(std::string_view input, const Parser& parser, size_t depth = 0)
{
bool empty = true;
for(auto [result, remainder] : parser(input))
{
if(result.empty())
continue;
empty = false;
for(auto [child_result, child_remainder] : recursiveParse(remainder, parser, depth + 1))
co_yield { result + child_result, child_remainder };
}
if(empty && depth != 0)
co_yield {"", input};
}
// Apply the parser for more than one time. Each parse result is combined with the previous result.
// And each parse can generate multiple results.
auto some(Parser parser) -> Parser
{
return [parser = std::move(parser)](std::string_view input) -> ResultGenerator
{
auto result = recursiveParse(input, parser);
for(auto item : result) {
auto [result, remainder] = item;
co_yield item;
}
};
}
auto zero_or_more(Parser parser) -> Parser
{
return [parser = std::move(parser)](std::string_view input) -> ResultGenerator
{
auto result = recursiveParse(input, parser, 1);
for(auto item : result) {
auto [result, remainder] = item;
co_yield item;
}
};
}
Parser operator+ (Parser lhs, Parser rhs)
{
return then(lhs, rhs);
}
Parser operator| (Parser lhs, Parser rhs)
{
return Or(lhs, rhs);
}
Parser operator+ (Parser lhs, std::string rhs)
{
return then(lhs, literal(rhs));
}
Parser operator+ (std::string lhs, Parser rhs)
{
return then(literal(lhs), rhs);
}
Parser operator| (Parser lhs, std::string rhs)
{
return Or(lhs, literal(rhs));
}
Parser operator| (std::string lhs, Parser rhs)
{
return Or(literal(lhs), rhs);
}
// Returns a parser that incrementally yields when it encounters a *, **, _, __ or `
auto markdown_text() -> Parser
{
return [](std::string_view input) -> ResultGenerator
{
bool last_space = false;
for(size_t i = 0; i < input.size(); ++i)
{
if(input[i] == '*' || input[i] == '_' || input[i] == '`') {
co_yield { std::string(input.substr(0, i)), input.substr(i) };
}
}
if(!input.empty() && (input.back() == '*' || input.back() != '_' || input.back() != '`'))
co_yield {std::string(input), ""};
};
}
cppcoro::task<> test()
{
auto normal_text = markdown_text();
auto code = "`" + normal_text + "`";
auto em = ("*" + (code | normal_text) + "*") | ("_" + (code | normal_text) + "_");
auto strong = ("**" + (em | normal_text | code) + "**") | ("__" + (em | normal_text | code) + "__");
auto span = strong | em | code | normal_text;
auto text_run = some(span) + nullstring();
std::string str = "This is some text *em* **strong** `code` *`code in em`* ***It works***";
std::cout << "parsing:\t" << str << std::endl;
auto parse_result = text_run(str);
for(auto [result, remainder] : parse_result) {
std::cout << "result:\t\t" << result << std::endl;
break;
}
co_return;
}
int main()
{
cppcoro::sync_wait(test());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment