Last active
February 28, 2024 13:50
-
-
Save grisumbras/90d2e99b8eb8b6c82147188c8a6287f6 to your computer and use it in GitHub Desktop.
Parsing arithmetic expressions written in Russian
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <boost/parser/parser.hpp> | |
#include <boost/parser/transcode_view.hpp> | |
#include <boost/variant2/variant.hpp> | |
#include <cassert> | |
#include <iostream> | |
#include <string> | |
namespace bp = boost::parser; | |
enum class gender { masculine, feminine, any }; | |
enum class plurality { one, few, many }; | |
enum class operation { addition, subtraction, multiplication, division }; | |
namespace actions { | |
bool check_plurality(std::int64_t n, plurality expected) | |
{ | |
if (n < 0) { n = -n; } | |
plurality pl = plurality::many; | |
if (n == 0) { | |
// do nothing | |
} else if ((n % 10 == 1) && (n % 100 != 11)) { | |
pl = plurality::one; | |
} else if (n < 5 || ((n % 10 < 5) && (n % 100 > 20))) { | |
pl = plurality::few; | |
} | |
return pl == expected; | |
} | |
constexpr auto make_gendered = [](auto const& attrs) { | |
using std::get; | |
auto result = std::pair(0u, gender::any); | |
if (0 == attrs.index()) { | |
result = get<0>(attrs); | |
} else { | |
result.first = get<1>(attrs); | |
} | |
return result; | |
}; | |
constexpr auto tys = [](auto const& attrs) { | |
using namespace bp::literals; | |
auto result = std::pair(0u, gender::any); | |
if (auto&& if_v2 = attrs[1_c]) { | |
result = *if_v2; | |
result.first += attrs[0_c]; | |
} else { | |
result.first = attrs[0_c]; | |
} | |
return result; | |
}; | |
constexpr auto multiplier = [](auto const& attrs) { | |
using namespace bp::literals; | |
using std::get; | |
auto result = std::pair(0u, gender::any); | |
if (1 == attrs.index()) { | |
result = get<1>(attrs); | |
} else { | |
auto const& val = get<0>(attrs); | |
if (auto&& if_v2 = val[1_c]) { result = *if_v2; } | |
result.first += val[0_c]; | |
} | |
return result; | |
}; | |
constexpr auto push_thousand = [](auto const& ctx) -> void { | |
using namespace bp::literals; | |
using std::get; | |
auto& attrs = _attr(ctx); | |
auto& locals = _locals(ctx); | |
auto multiplier = std::pair(1u, gender::any); | |
if (auto&& if_v1 = attrs[0_c]) { multiplier = *if_v1; } | |
auto&& v2 = attrs[1_c]; | |
auto const base = get<0>(v2); | |
gender const g = multiplier.second; | |
if ( | |
(locals.second && locals.second <= base) | |
|| !check_plurality(multiplier.first, get<2>(v2)) | |
|| ((g != get<1>(v2)) && (g != gender::any))) { | |
_pass(ctx) = false; | |
return; | |
} | |
locals.first += multiplier.first * base; | |
locals.second = base; | |
}; | |
constexpr auto return_thousands | |
= [](auto const& ctx) -> void { _val(ctx) = _locals(ctx).first; }; | |
constexpr auto hundreds = [](auto const& ctx) -> void { | |
auto const& attr = _attr(ctx); | |
_pass(ctx) = attr.second != gender::feminine; | |
_val(ctx) = attr.first; | |
}; | |
constexpr auto numeral = [](auto const& attrs) { | |
using namespace bp::literals; | |
using std::get; | |
if (1 == attrs.index()) { return get<1>(attrs); } | |
auto const& v0 = get<0>(attrs); | |
auto const v1 = v0[0_c]; | |
if (auto const& if_v2 = v0[1_c]) { return v1 + *if_v2; } | |
return v1; | |
}; | |
constexpr auto init = [](auto& ctx) -> void { _locals(ctx) = _attr(ctx); }; | |
constexpr auto operation = [](auto& ctx) -> void { | |
using namespace bp::literals; | |
auto const& attrs = _attr(ctx); | |
auto const v = attrs[1_c]; | |
switch (attrs[0_c]) { | |
case operation::addition: _locals(ctx) += v; break; | |
case operation::subtraction: _locals(ctx) -= v; break; | |
case operation::multiplication: _locals(ctx) *= v; break; | |
case operation::division: _locals(ctx) /= v; break; | |
default: assert(false); | |
} | |
}; | |
constexpr auto return_locals | |
= [](auto const& ctx) -> void { _val(ctx) = _locals(ctx); }; | |
constexpr auto negation = [](auto const& attr) { return -attr; }; | |
} // namespace actions | |
namespace rules { | |
bp::symbols<std::pair<unsigned, gender>> const one_two = { | |
{"один", {1, gender::masculine}}, | |
{"одна", {1, gender::feminine}}, | |
{"два", {2, gender::masculine}}, | |
{"две", {2, gender::feminine}}, | |
}; | |
bp::symbols<unsigned> const ones_symb = { | |
{"три", 3}, | |
{"четыре", 4}, | |
{"пять", 5}, | |
{"шесть", 6}, | |
{"семь", 7}, | |
{"восемь", 8}, | |
{"девять", 9}}; | |
bp::symbols<unsigned> const teens = { | |
{"десять", 10}, | |
{"одиннадцать", 11}, | |
{"двенадцать", 12}, | |
{"тринадцать", 13}, | |
{"четырнадцать", 14}, | |
{"пятнадцать", 15}, | |
{"шестнадцать", 16}, | |
{"семнадцать", 17}, | |
{"восемнадцать", 18}, | |
{"девятнадцать", 19}, | |
}; | |
bp::symbols<unsigned> const tys_symb = { | |
{"двадцать", 20}, | |
{"тридцать", 30}, | |
{"сорок", 40}, | |
{"пятьдесят", 50}, | |
{"шестьдесят", 60}, | |
{"семьдесят", 70}, | |
{"восемьдесят", 80}, | |
{"девяносто", 90}, | |
}; | |
bp::symbols<unsigned> const hundreds_symb = { | |
{"сто", 100}, | |
{"двести", 200}, | |
{"триста", 300}, | |
{"четыреста", 400}, | |
{"пятьсот", 500}, | |
{"шестьсот", 600}, | |
{"семьсот", 700}, | |
{"восемьсот", 800}, | |
{"девятьсот", 900}, | |
}; | |
bp::symbols<std::tuple<unsigned, gender, plurality>> const thousands_symb = { | |
{"тысяча", {1000, gender::feminine, plurality::one}}, | |
{"тысячи", {1000, gender::feminine, plurality::few}}, | |
{"тысяч", {1000, gender::feminine, plurality::many}}, | |
}; | |
bp::symbols<std::tuple<unsigned, gender, plurality>> const millions_symb = { | |
{"миллион", {1000'000, gender::masculine, plurality::one}}, | |
{"миллиона", {1000'000, gender::masculine, plurality::few}}, | |
{"миллионов", {1000'000, gender::masculine, plurality::many}}, | |
}; | |
bp::symbols<std::tuple<unsigned, gender, plurality>> const billions_symb = { | |
{"миллиард", {1000'000'000, gender::masculine, plurality::one}}, | |
{"миллиарда", {1000'000'000, gender::masculine, plurality::few}}, | |
{"миллиардов", {1000'000'000, gender::masculine, plurality::many}}, | |
}; | |
bp::symbols<operation> const addition_op | |
= {{"плюс", operation::addition}, {"минус", operation::subtraction}}; | |
bp::symbols<operation> const multiplication_op = { | |
{"умножить", operation::multiplication}, {"разделить", operation::division}}; | |
constexpr bp::rule<struct numeral, std::int64_t> numeral = "numeral"; | |
constexpr bp::rule<struct ones, std::pair<std::int64_t, gender>> ones = "ones"; | |
constexpr bp::rule<struct tens, std::pair<std::int64_t, gender>> tens = "tens"; | |
constexpr bp::rule<struct tys, std::pair<std::int64_t, gender>> tys = "tys"; | |
constexpr bp::rule<struct multiplier, std::pair<std::int64_t, gender>> | |
multiplier = "multiplier"; | |
constexpr bp::rule<struct hundreds, std::int64_t> hundreds = "hundreds"; | |
constexpr bp:: | |
rule<struct thousands, std::int64_t, std::pair<std::int64_t, unsigned>> | |
thousands = "thousands"; | |
constexpr bp::rule<struct expression, std::int64_t, std::int64_t> expression | |
= "expession"; | |
constexpr bp::rule<struct term, std::int64_t, std::int64_t> term = "term"; | |
constexpr bp::rule<struct factor, std::int64_t> factor = "factor"; | |
constexpr bp::rule<struct negation, std::int64_t> negation = "negation"; | |
constexpr bp::rule<struct group, std::int64_t> group = "group"; | |
// clang-format off | |
auto const numeral_def | |
= ((thousands >> -hundreds) | hundreds)[actions::numeral]; | |
auto const thousands_def | |
= (+(-multiplier >> (thousands_symb | millions_symb | billions_symb))[actions::push_thousand]) | |
[actions::return_thousands]; | |
auto const hundreds_def = multiplier[actions::hundreds]; | |
auto const multiplier_def | |
= ((hundreds_symb >> -tens) | tens)[actions::multiplier]; | |
auto const tens_def = (tys | teens | ones)[actions::make_gendered]; | |
auto const tys_def = (tys_symb >> -ones)[actions::tys]; | |
auto const ones_def = (one_two | ones_symb)[actions::make_gendered]; | |
auto const expression_def | |
= (term[actions::init] >> *((addition_op >> term)[actions::operation])) | |
[actions::return_locals]; | |
auto const term_def | |
= (factor[actions::init] | |
>> *((multiplication_op >> "на" >> factor)[actions::operation])) | |
[actions::return_locals]; | |
auto const factor_def = numeral | negation | group; | |
auto const group_def = '(' > expression > ')'; | |
auto const negation_def = ("минус" >> (numeral | group))[actions::negation]; | |
// clang-format on | |
BOOST_PARSER_DEFINE_RULES( | |
expression, | |
term, | |
factor, | |
group, | |
negation, | |
numeral, | |
ones, | |
tens, | |
tys, | |
hundreds, | |
thousands, | |
multiplier); | |
} // namespace rules | |
int main() | |
{ | |
std::string input; | |
std::getline(std::cin, input); | |
auto const result | |
= bp::parse(input | bp::as_utf8, bp::no_case[rules::expression], bp::ws); | |
// auto const result = bp::parse( | |
// input | bp::as_utf8, | |
// bp::no_case[rules::expression], | |
// bp::ws, | |
// bp::trace::on); | |
if (result) { | |
std::cout << *result << "\n"; | |
} else { | |
std::cerr << "Ошибка\n"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment