-
-
Save tomolatoon/1289a7b0d6106c1443a14429802a1e53 to your computer and use it in GitHub Desktop.
BudouX を Siv3D で ranges と共に使うことが出来るコード。ラクラムシさんの実装に基づきます。ラクラムシさんの: https://gist.github.com/Raclamusi/aa1c9776d53c838a16e294cf92eb2c9b
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Siv3D.hpp> // Siv3D v0.6.11 | |
#include <ranges> | |
#include "rivet.hpp" // https://github.com/onihusube/rivet/blob/master/include/rivet.hpp | |
namespace tomolatoon | |
{ | |
struct BudouXParser | |
{ | |
using Model = HashTable<String, HashTable<String, int32>>; | |
BudouXParser(Model model, Optional<int32> totalScore = none) | |
: m_totalScore{totalScore.value_or(0)} | |
, m_model{std::move(model)} | |
{ | |
if (not totalScore) | |
{ | |
for (const auto& [featureKey, group] : m_model) | |
{ | |
for (const auto& [sequence, score] : group) | |
{ | |
m_totalScore += score; | |
} | |
} | |
} | |
} | |
BudouXParser() = default; | |
BudouXParser(const BudouXParser&) = default; | |
BudouXParser(BudouXParser&&) = default; | |
BudouXParser& operator=(const BudouXParser&) = default; | |
BudouXParser& operator=(BudouXParser&&) = default; | |
explicit operator bool() const | |
{ | |
return (not m_model.empty()); | |
} | |
int32 getFeatureScore(StringView featureKey, StringView sequence) const | |
{ | |
if (const auto itGroup = m_model.find(featureKey); itGroup != m_model.end()) | |
{ | |
const auto& group = itGroup->second; | |
if (const auto itScore = group.find(sequence); itScore != group.end()) | |
{ | |
const auto& score = itScore->second; | |
return score; | |
} | |
} | |
return 0; | |
} | |
// target で指定された文字について、全ての Feature におけるスコアを合計した値を返す | |
int32 getScore(StringView sequence, int64 target) const | |
{ | |
constexpr std::tuple<StringView, int32, int32> Features[]{ | |
{U"UW1", -3, 1}, | |
{U"UW2", -2, 1}, | |
{U"UW3", -1, 1}, | |
{U"UW4", 0, 1}, | |
{U"UW5", 1, 1}, | |
{U"UW6", 2, 1}, | |
{U"BW1", -2, 2}, | |
{U"BW2", -1, 2}, | |
{U"BW3", 0, 2}, | |
{U"TW1", -3, 3}, | |
{U"TW2", -2, 3}, | |
{U"TW3", -1, 3}, | |
{U"TW4", 0, 3}, | |
}; | |
int32 score = 0; | |
for (const auto& [key, pos, n] : Features) | |
{ | |
if ((0 <= (target + pos)) && ((target + pos) < static_cast<int64>(sequence.size()))) | |
{ | |
score += getFeatureScore(key, sequence.substr((target + pos), n)); | |
} | |
} | |
return score; | |
} | |
bool overBoundaryScore(int32 score) const | |
{ | |
return (score * 2 > m_totalScore); | |
} | |
bool parseCharacter(StringView sentence, int64 target) const | |
{ | |
return overBoundaryScore(getScore(sentence, target)); | |
} | |
Array<size_t> parseBoundaries(StringView sentence) const | |
{ | |
Array<size_t> result; | |
for (int64 i = 1; i < static_cast<int64>(sentence.size()); ++i) | |
{ | |
if (overBoundaryScore(getScore(sentence, i))) | |
{ | |
result.push_back(i); | |
} | |
} | |
return result; | |
} | |
Array<String> parse(StringView sentence) const | |
{ | |
Array<String> result; | |
size_t start = 0; | |
for (size_t boundary : parseBoundaries(sentence)) | |
{ | |
result.emplace_back(sentence.substr(start, (boundary - start))); | |
start = boundary; | |
} | |
result.emplace_back(sentence.substr(start)); | |
return result; | |
} | |
Array<StringView> parseView(StringView sentence) const | |
{ | |
Array<StringView> result; | |
size_t start = 0; | |
for (size_t boundary : parseBoundaries(sentence)) | |
{ | |
result.push_back(sentence.substr(start, (boundary - start))); | |
start = boundary; | |
} | |
result.push_back(sentence.substr(start)); | |
return result; | |
} | |
int32 getTotalScore() const | |
{ | |
return m_totalScore; | |
} | |
const Model& getModel() const& | |
{ | |
return m_model; | |
} | |
Model&& getModel() && | |
{ | |
return std::move(m_model); | |
} | |
friend bool operator==(const BudouXParser& lhs, const BudouXParser& rhs) = default; | |
static BudouXParser Parse(const JSON& modelJSON) | |
{ | |
Model model; | |
int32 totalScore = 0; | |
for (const auto& [featureKey, groupJSON] : modelJSON) | |
{ | |
auto& group = model[featureKey]; | |
for (const auto& [sequence, scoreJSON] : groupJSON) | |
{ | |
const int32 score = scoreJSON.getOr<int32>(0); | |
group[sequence] = score; | |
totalScore += score; | |
} | |
} | |
return BudouXParser{std::move(model), totalScore}; | |
} | |
template <class Reader, std::enable_if_t<std::is_base_of_v<IReader, Reader>>* = nullptr> | |
static BudouXParser Load(Reader&& reader) | |
{ | |
return Parse(JSON::Load(std::forward<Reader>(reader))); | |
} | |
static BudouXParser Load(FilePathView path) | |
{ | |
return Load(BinaryReader{path}); | |
} | |
static BudouXParser Download(URLView url) | |
{ | |
MemoryWriter writer; | |
SimpleHTTP::Get(url, {}, writer); | |
return Load(MemoryReader{writer.retrieve()}); | |
} | |
private: | |
int32 m_totalScore = 0; | |
Model m_model = {}; | |
}; | |
struct as_sentinel_tag | |
{}; | |
template <std::ranges::input_range View> | |
requires std::ranges::view<View> && requires { requires sizeof std::ranges::range_value_t<View> == 4; } | |
struct BudouXBreakView : std::ranges::view_interface<BudouXBreakView<View>> | |
{ | |
template <bool IsConst> | |
friend struct iterator; | |
template <bool IsConst> | |
struct sentinel; | |
template <bool IsConst> | |
struct iterator | |
{ | |
using I = std::ranges::iterator_t<std::conditional_t<IsConst, const View, View>>; | |
using Parent = std::conditional_t<IsConst, const BudouXBreakView, BudouXBreakView>; | |
using difference_type = ptrdiff_t; | |
using value_type = String; | |
using iterator_concept = std::conditional_t< | |
std::forward_iterator<I>, | |
std::forward_iterator_tag, | |
std::input_iterator_tag>; | |
// clang-format off | |
iterator() requires std::default_initializable<I> | |
{ | |
++m_bufTarget; | |
// m_parent が nullptr なのでまずい | |
//++(*this); | |
} | |
iterator(iterator&&) = default; | |
iterator(const iterator&) requires std::copy_constructible<I> = default; | |
// clang-format on | |
iterator(iterator<!IsConst> it) | |
requires IsConst && std::convertible_to<std::ranges::sentinel_t<View>, I> | |
: m_parent{it.m_parent} | |
, m_it{std::move(it.m_it)} | |
, m_bufTarget{std::move(it.m_bufTarget)} | |
, m_cur{std::move(it.m_cur)} | |
, m_isSentinel{it.m_cur} | |
{} | |
iterator& operator=(iterator&&) = default; | |
// clang-format off | |
iterator& operator=(const iterator&) requires std::copyable<I> = default; | |
// clang-format on | |
iterator(Parent& parent, I it) | |
: m_parent{std::addressof(parent)} | |
, m_it{std::move(it)} | |
, m_isSentinel{false} | |
{ | |
++(*this); | |
} | |
iterator(as_sentinel_tag, Parent& parent, I it) | |
: m_parent{std::addressof(parent)} | |
, m_it{std::move(it)} | |
, m_isSentinel{true} | |
{ | |
++(*this); | |
} | |
String operator*() const& | |
{ | |
return m_cur; | |
} | |
String operator*() && | |
{ | |
return std::move(m_cur); | |
} | |
friend String iter_move(const iterator& it) | |
{ | |
return std::move(it.m_cur); | |
} | |
iterator& operator++() | |
{ | |
// m_buf はこんな感じになるようにバッファリングする | |
// ↓ ここを基準にするお気持ち | |
// [-3][-2][-1][0][1][2] | |
if (isEnd()) | |
{ | |
throw Error{U"[BudouXBreakView::Iterator::operator++]: cannnot advance over end."}; | |
} | |
m_cur.clear(); | |
const auto sen = std::ranges::end(m_parent->m_view); | |
// 最初用(先読み) | |
if (m_buf.size() == 0) | |
{ | |
for (; m_it != sen; ++m_it) | |
{ | |
m_buf.push_back(*m_it); | |
// 後の都合上、早期 break する | |
if (m_buf.size() == 6) | |
{ | |
break; | |
} | |
} | |
} | |
// 最後用(終端状態へ) | |
// 終端を指すイテレータが生成された場合、コンストラクタで呼び出されるときにここに入る | |
// なお、通常ルーチンの直後に入れると、最終状態になった時に続けて終端状態になってしまう | |
if (m_bufTarget == m_buf.size()) | |
{ | |
++m_bufTarget; | |
} | |
// 通常ルーチン | |
for (; m_bufTarget < m_buf.size();) | |
{ | |
// 常にやる更新処理 | |
m_cur.push_back(m_buf[m_bufTarget]); | |
// 最初に来た時 or 終端に来た時 の更新処理 | |
if (m_bufTarget < 3 || m_it == sen) | |
{ | |
++m_bufTarget; | |
} | |
// 通常繰り返し期間のの更新処理 | |
else | |
{ | |
++m_it; | |
if (m_it != sen) | |
{ | |
m_buf.rotate(1).back() = *m_it; | |
} | |
else | |
{ | |
// 最終状態へ向けて m_bufTarget を進める | |
++m_bufTarget; | |
} | |
} | |
if (m_parent->getPerserRef().parseCharacter(m_buf, m_bufTarget)) | |
{ | |
break; | |
} | |
} | |
return *this; | |
} | |
iterator& operator++(int) | |
{ | |
++*this; | |
return *this; | |
} | |
iterator operator++(int) | |
requires std::forward_iterator<I> | |
{ | |
iterator tmp = *this; | |
++(*this); | |
return tmp; | |
} | |
// 終端状態かどうか | |
bool isEnd() const | |
{ | |
return m_bufTarget == m_buf.size() + 1; | |
} | |
friend bool operator==(const iterator& it, std::default_sentinel_t) | |
{ | |
return it.isEnd(); | |
} | |
friend bool operator==(const iterator& lhs, const iterator& rhs) | |
{ | |
// lhs だけが sentinel | |
// rhs.isEnd() && lhs.m_it == rhs.m_it; | |
// rhs だけが sentinal | |
// lhs.isEnd() && lhs.m_it == rhs.m_it; | |
// lhs と rhs がどちらも sentinel | |
// lhs.m_it == rhs.m_it; (lhs.isEnd() && rhs.isEnd() は必ず true になる) | |
// lhs と rhs がどちらも sentinel でない | |
// lhs.m_it == rhs.m_it && lhs.m_bufTarget == rhs.m_bufTarget; | |
if (lhs.m_isSentinel || rhs.m_isSentinel) | |
{ | |
return lhs.isEnd() && rhs.isEnd() && lhs.m_it == rhs.m_it; | |
} | |
else | |
{ | |
return lhs.m_it == rhs.m_it && lhs.m_bufTarget == rhs.m_bufTarget; | |
} | |
} | |
private: | |
Parent* m_parent = nullptr; | |
I m_it; | |
// BudouX の解析の都合上、前読み 2 と後読み 3 を含めた 6 文字をバッファリングするので | |
// [-3][-2][-1][0][1][2] という雰囲気でやる(実際には m_bufTarget が中心) | |
String m_buf; | |
// 初期状態: 0、但しコンストラクタで 1 になる | |
// 通常状態: [1, m_buf.size() - 1] | |
// 最終状態: m_buf.size()、全ての要素を読み終わった時 | |
// 終端状態: m_buf.size() + 1 | |
size_t m_bufTarget = 0; | |
String m_cur; | |
bool m_isSentinel = true; | |
}; | |
template <bool IsConst> | |
struct sentinel | |
{ | |
using S = std::ranges::sentinel_t<std::conditional_t<IsConst, View, const View>>; | |
sentinel() = default; | |
sentinel(S sen) | |
: m_sen{std::move(sen)} | |
{} | |
sentinel(sentinel&&) = default; | |
sentinel(const sentinel&) = default; | |
sentinel(sentinel<!IsConst> sen) | |
requires IsConst && std::convertible_to<std::ranges::sentinel_t<View>, S> | |
: m_sen{std::move(sen.m_sen)} | |
{} | |
sentinel& operator=(sentinel&&) = default; | |
sentinel& operator=(const sentinel&) = default; | |
friend bool operator==(const iterator<IsConst>& it, const sentinel&) | |
{ | |
return it.isEnd(); | |
} | |
private: | |
S m_sen; | |
}; | |
// clang-format off | |
BudouXBreakView() requires std::default_initializable<View> = default; | |
// clang-format on | |
BudouXBreakView(View view, const BudouXParser& parser) | |
: m_view{std::move(view)} | |
, m_parser{parser} | |
{} | |
BudouXBreakView(View view, BudouXParser&& parser) | |
: m_view{std::move(view)} | |
, m_parser{std::move(parser)} | |
{} | |
BudouXBreakView(View view, std::reference_wrapper<BudouXParser> parser) | |
: m_view{std::move(view)} | |
, m_parser{std::ref(std::as_const(parser.get()))} | |
{} | |
BudouXBreakView(View view, std::reference_wrapper<const BudouXParser> parser) | |
: m_view{std::move(view)} | |
, m_parser{std::move(parser)} | |
{} | |
auto begin() | |
{ | |
return iterator<false>{*this, std::ranges::begin(m_view)}; | |
} | |
auto end() | |
{ | |
return sentinel<false>{std::ranges::end(m_view)}; | |
} | |
auto end() | |
requires std::ranges::common_range<View> | |
{ | |
return iterator<false>{as_sentinel_tag{}, *this, std::ranges::end(m_view)}; | |
} | |
auto begin() const | |
{ | |
return iterator<true>{*this, std::ranges::begin(m_view)}; | |
} | |
auto end() const | |
{ | |
return sentinel<true>{std::ranges::end(m_view)}; | |
} | |
auto end() const | |
requires std::ranges::common_range<const View> | |
{ | |
return iterator<true>{as_sentinel_tag{}, *this, std::ranges::end(m_view)}; | |
} | |
const BudouXParser& getPerserRef() const | |
{ | |
return m_parser.index() == 0 ? std::get<0>(m_parser) : std::get<1>(m_parser).get(); | |
} | |
private: | |
View m_view; | |
std::variant<BudouXParser, std::reference_wrapper<const BudouXParser>> m_parser; | |
}; | |
template <class Range, class T> | |
BudouXBreakView(Range&& view, T parser) -> BudouXBreakView<std::views::all_t<Range>>; | |
} // namespace tomolatoon | |
namespace tomolatoon::detail | |
{ | |
struct BudouXBreakAdaptor : rivet::range_adaptor_base<BudouXBreakAdaptor> | |
{ | |
template <std::ranges::viewable_range R, class T> | |
constexpr auto operator()(R&& r, T&& parser) const | |
{ | |
return BudouXBreakView{std::forward<R>(r), std::forward<T>(parser)}; | |
} | |
RIVET_USING_BASEOP; | |
}; | |
} // namespace tomolatoon::detail | |
namespace tomolatoon | |
{ | |
inline constexpr detail::BudouXBreakAdaptor BudouXBreak; | |
} | |
void Main() | |
{ | |
const auto parser = tomolatoon::BudouXParser::Download(U"https://raw.githubusercontent.com/google/budoux/main/budoux/models/ja.json"); | |
const Font font{FontMethod::MSDF, 48}; | |
TextAreaEditState textAreaState{ | |
U"Siv3D(シブスリーディー)は、音や画像、AI を使ったゲームやアプリを、" | |
U"モダンな C++ コードで楽しく簡単にプログラミングできるオープンソースのフレームワークです。"}; | |
double fontSizeSlider = 0.4; | |
bool forceReturn = false; | |
while (System::Update()) | |
{ | |
if (SimpleGUI::TextArea(textAreaState, Vec2{30, 20}, SizeF{740, 100})) | |
{ | |
textAreaState.text = textAreaState.text.removed(U'\n'); | |
} | |
SimpleGUI::Slider(U"Font size", fontSizeSlider, Vec2{30, 130}, 100, 200); | |
const double fontSize = (fontSizeSlider * 80 + 16); | |
SimpleGUI::CheckBox(forceReturn, U"境界で必ず改行する", Vec2{340, 130}); | |
{ | |
Vec2 pos{30, 180}; | |
for (const auto& s : textAreaState.text | tomolatoon::BudouXBreak(std::ref(parser))) | |
{ | |
const auto text = font(s); | |
const auto advances = text.getXAdvances(fontSize); | |
if (pos.x != 30 && (forceReturn || ((pos.x + advances.sum()) > 770))) | |
{ | |
pos.x = 30; | |
pos.y += font.height(fontSize); | |
} | |
double advance = 0; | |
size_t start = 0; | |
for (size_t i = 0; i < text.clusters.size(); ++i) | |
{ | |
advance += advances[i]; | |
if ((pos.x + advance) > 770) | |
{ | |
font(s.substr(start, (text.clusters[i].pos - start))).draw(fontSize, pos); | |
pos.x = 30; | |
pos.y += font.height(fontSize); | |
advance = advances[i]; | |
start += text.clusters[i].pos; | |
} | |
} | |
font(s.substr(start)).draw(fontSize, pos); | |
pos.x += advance; | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment