Created
December 29, 2017 09:36
-
-
Save wx257osn2/0ba053b03f0de0e60211252609f44f82 to your computer and use it in GitHub Desktop.
MeCabのC++ラッパー
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Copyright (C) 2017 I | |
// Distributed under the Boost Software License, Version 1.0. | |
// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
#ifndef MECAB_HPP_INCLUDED_ | |
#define MECAB_HPP_INCLUDED_ | |
#include <string_view> | |
#include <memory> | |
#include <stdexcept> | |
#include <mecab.h> | |
//TODO: support expected | |
namespace mecab{ | |
using parse_error = std::runtime_error;//TODO: create exception type | |
enum class node_status{ | |
normal = MECAB_NOR_NODE, | |
unknown = MECAB_UNK_NODE, | |
bos = MECAB_BOS_NODE, | |
eos = MECAB_EOS_NODE, | |
eon = MECAB_EON_NODE | |
}; | |
struct node:MeCab::Node{ | |
std::string_view get_surface()const{return {this->surface, this->length};} | |
int character_type()const{return this->char_type;} | |
node_status status()const{return static_cast<node_status>(this->stat);} | |
bool is_best()const{return isbest == 1u;} | |
struct features_type{ | |
std::string_view pos; | |
std::string_view pos_detail1; | |
std::string_view pos_detail2; | |
std::string_view pos_detail3; | |
std::string_view conjugated_type; | |
std::string_view conjugated_form; | |
std::string_view original_form; | |
std::string_view reading; | |
std::string_view pronunciation; | |
}; | |
features_type features()const{ | |
std::string_view view(this->feature); | |
auto f = [&]{ | |
auto pos = view.find_first_of(','); | |
std::string_view ret(view.data(), pos); | |
view.remove_prefix(pos+1); | |
return ret; | |
}; | |
features_type fs; | |
fs.pos = f(); | |
fs.pos_detail1 = f(); | |
fs.pos_detail2 = f(); | |
fs.pos_detail3 = f(); | |
fs.conjugated_type = f(); | |
fs.conjugated_form = f(); | |
fs.original_form = f(); | |
fs.reading = f(); | |
fs.pronunciation = view; | |
return fs; | |
} | |
}; | |
class lattice{ | |
std::unique_ptr<MeCab::Lattice, void(*)(MeCab::Lattice*)> l; | |
explicit lattice(MeCab::Lattice*&& lat):l{std::move(lat), &MeCab::deleteLattice}{} | |
friend class model; | |
template<typename Impl> | |
struct node_forward_iterator{ | |
using value_type = node; | |
using reference = value_type&; | |
constexpr node_forward_iterator()noexcept:ptr{nullptr}{} | |
constexpr node_forward_iterator(MeCab::Node* p)noexcept:ptr{p}{} | |
constexpr node_forward_iterator(const node_forward_iterator&) = default; | |
node_forward_iterator& operator=(const node_forward_iterator&) = default; | |
constexpr node_forward_iterator(node_forward_iterator&&) = default; | |
node_forward_iterator& operator=(node_forward_iterator&&) = default; | |
~node_forward_iterator() = default; | |
constexpr node& operator*()const{return *reinterpret_cast<node*>(ptr);} | |
constexpr node* operator->()const{return reinterpret_cast<node*>(ptr);} | |
constexpr bool operator==(const node_forward_iterator& rhs)const noexcept{return ptr == rhs.ptr;} | |
constexpr bool operator!=(const node_forward_iterator& rhs)const noexcept{return ptr != rhs.ptr;} | |
node_forward_iterator& operator++(){Impl::next(ptr);return *this;} | |
node_forward_iterator operator++(int){auto ret = *this;++*this;return ret;} | |
protected: | |
MeCab::Node* ptr; | |
}; | |
template<typename Impl> | |
struct node_bidirectional_iterator:node_forward_iterator<Impl>{ | |
using node_forward_iterator<Impl>::node_forward_iterator; | |
node_bidirectional_iterator(const node_bidirectional_iterator&) = default; | |
node_bidirectional_iterator(node_bidirectional_iterator&&) = default; | |
node_bidirectional_iterator& operator=(const node_bidirectional_iterator&) = default; | |
node_bidirectional_iterator& operator=(node_bidirectional_iterator&&) = default; | |
node_bidirectional_iterator& operator++(){Impl::next(this->ptr);return *this;} | |
node_bidirectional_iterator operator++(int){auto ret = *this;++*this;return ret;} | |
node_bidirectional_iterator& operator--(){Impl::prev(this->ptr);return *this;} | |
node_bidirectional_iterator operator--(int){auto ret = *this;--*this;return ret;} | |
}; | |
template<typename Impl> | |
struct const_node_forward_iterator{ | |
using value_type = const node; | |
using reference = value_type&; | |
constexpr const_node_forward_iterator()noexcept:ptr{nullptr}{} | |
constexpr const_node_forward_iterator(const MeCab::Node* p)noexcept:ptr{p}{} | |
constexpr const_node_forward_iterator(const const_node_forward_iterator&) = default; | |
const_node_forward_iterator& operator=(const const_node_forward_iterator&) = default; | |
constexpr const_node_forward_iterator(const_node_forward_iterator&&) = default; | |
const_node_forward_iterator& operator=(const_node_forward_iterator&&) = default; | |
~const_node_forward_iterator() = default; | |
constexpr const node& operator*()const{return *reinterpret_cast<const node*>(ptr);} | |
constexpr const node* operator->()const{return reinterpret_cast<const node*>(ptr);} | |
constexpr bool operator==(const const_node_forward_iterator& rhs)const noexcept{return ptr == rhs.ptr;} | |
constexpr bool operator!=(const const_node_forward_iterator& rhs)const noexcept{return ptr != rhs.ptr;} | |
const_node_forward_iterator& operator++(){Impl::next(ptr);return *this;} | |
const_node_forward_iterator operator++(int){auto ret = *this;++*this;return ret;} | |
protected: | |
const MeCab::Node* ptr; | |
}; | |
template<typename Impl> | |
struct const_node_bidirectional_iterator:const_node_forward_iterator<Impl>{ | |
using const_node_forward_iterator<Impl>::const_node_forward_iterator; | |
const_node_bidirectional_iterator(const const_node_bidirectional_iterator&) = default; | |
const_node_bidirectional_iterator(const_node_bidirectional_iterator&&) = default; | |
const_node_bidirectional_iterator& operator=(const const_node_bidirectional_iterator&) = default; | |
const_node_bidirectional_iterator& operator=(const_node_bidirectional_iterator&&) = default; | |
const_node_bidirectional_iterator& operator++(){Impl::next(this->ptr);return *this;} | |
const_node_bidirectional_iterator operator++(int){auto ret = *this;++*this;return ret;} | |
const_node_bidirectional_iterator& operator--(){Impl::prev(this->ptr);return *this;} | |
const_node_bidirectional_iterator operator--(int){auto ret = *this;--*this;return ret;} | |
}; | |
template<typename Iterator, typename ConstIterator> | |
class range{ | |
MeCab::Node* beg; | |
constexpr explicit range(MeCab::Node* b):beg{b}{} | |
friend lattice; | |
public: | |
using iterator = Iterator; | |
using const_iterator = ConstIterator; | |
iterator begin(){return iterator{beg};} | |
iterator end(){return iterator{};} | |
const_iterator begin()const{return const_iterator{beg};} | |
const_iterator end()const{return const_iterator{};} | |
const_iterator cbegin()const{return const_iterator{beg};} | |
const_iterator cend()const{return const_iterator{};} | |
}; | |
struct bos_iterator_impl{ | |
static void next(MeCab::Node*& ptr){ptr = ptr->next;} | |
static void next(const MeCab::Node*& ptr){ptr = ptr->next;} | |
static void prev(MeCab::Node*& ptr){ptr = ptr->prev;} | |
static void prev(const MeCab::Node*& ptr){ptr = ptr->prev;} | |
}; | |
struct begins_iterator_impl{ | |
static void next(MeCab::Node*& ptr){ptr = ptr->bnext;} | |
static void next(const MeCab::Node*& ptr){ptr = ptr->bnext;} | |
}; | |
struct ends_iterator_impl{ | |
static void next(MeCab::Node*& ptr){ptr = ptr->enext;} | |
static void next(const MeCab::Node*& ptr){ptr = ptr->enext;} | |
}; | |
public: | |
enum class request_type{ | |
one_best_result = MECAB_ONE_BEST, | |
n_best_results = MECAB_NBEST, | |
partial_parsing = MECAB_PARTIAL, | |
marginal_probabilities = MECAB_MARGINAL_PROB, | |
alternative_results = MECAB_ALTERNATIVE, | |
all_morphs = MECAB_ALL_MORPHS, | |
allocate_sentence = MECAB_ALLOCATE_SENTENCE | |
}; | |
friend constexpr request_type operator|(const request_type& lhs, const request_type& rhs)noexcept{return static_cast<request_type>(static_cast<int>(lhs) | static_cast<int>(rhs));} | |
lattice(lattice&&) = default; | |
MeCab::Lattice* get()const{return l.get();} | |
explicit operator bool()const{return bool{l};} | |
lattice& clear(){l->clear();return *this;} | |
lattice& set_sentence(const char* sentence){l->set_sentence(sentence);return *this;} | |
lattice& set_sentence(const char* sentence, std::size_t len){l->set_sentence(sentence, len);return *this;} | |
lattice& set_request_type(request_type t){l->set_request_type(static_cast<int>(t));return *this;} | |
lattice& add_request_type(request_type t){l->add_request_type(static_cast<int>(t));return *this;} | |
lattice& remove_request_type(request_type t){l->remove_request_type(static_cast<int>(t));return *this;} | |
bool has_request_type(request_type t)const{return l->has_request_type(static_cast<int>(t));} | |
request_type get_request_type()const{return static_cast<request_type>(l->request_type());} | |
const char* to_str()const{return l->toString();} | |
using bos_range = range<node_bidirectional_iterator<bos_iterator_impl>, const_node_bidirectional_iterator<bos_iterator_impl>>; | |
using begins_range = range<node_forward_iterator<begins_iterator_impl>, const_node_forward_iterator<begins_iterator_impl>>; | |
using ends_range = range<node_forward_iterator<ends_iterator_impl>, const_node_forward_iterator<ends_iterator_impl>>; | |
bos_range nodes()const{return bos_range{l->bos_node()};} | |
begins_range begins(std::size_t pos)const{return begins_range{l->begin_nodes(pos)};} | |
ends_range ends(std::size_t pos)const{return ends_range(l->end_nodes(pos));} | |
bool is_available()const{return l->is_available();} | |
const char* sentence()const{return l->sentence();} | |
std::size_t size()const{return l->size();} | |
bool next(){return l->next();} | |
template<typename F> | |
void n_best(F&& f){ | |
std::size_t i = 0; | |
while(true){ | |
std::forward<F>(f)(*this, i++); | |
if(!next()) | |
break; | |
} | |
} | |
template<typename F> | |
void n_best(F&& f, std::size_t n){ | |
for(std::size_t i = 0; i < n; ++i){ | |
std::forward<F>(f)(*this, i); | |
if(!next()) | |
break; | |
} | |
} | |
float theta()const{return l->theta();} | |
}; | |
class tagger{ | |
std::unique_ptr<MeCab::Tagger, void(*)(MeCab::Tagger*)> t; | |
explicit tagger(MeCab::Tagger*&& tag):t{std::move(tag), &MeCab::deleteTagger}{} | |
friend class model; | |
public: | |
tagger(tagger&&) = default; | |
MeCab::Tagger* get()const{return t.get();} | |
explicit operator bool()const{return bool{t};} | |
bool parse(MeCab::Lattice* l)const{return t->parse(l);} | |
template<typename Lattice> | |
bool parse(Lattice&& l)const{return parse(std::forward<Lattice>(l).get());} | |
const char* parse(const char* str){return t->parse(str);} | |
const char* what()const{return t->what();} | |
}; | |
class dictionary_info{ | |
const MeCab::DictionaryInfo* d; | |
public: | |
using value_type = const MeCab::DictionaryInfo; | |
using reference = value_type&; | |
constexpr explicit dictionary_info()noexcept:d{nullptr}{} | |
constexpr explicit dictionary_info(const MeCab::DictionaryInfo* p)noexcept:d{p}{} | |
constexpr dictionary_info(const dictionary_info&) = default; | |
dictionary_info& operator=(const dictionary_info&) = default; | |
constexpr dictionary_info(dictionary_info&&) = default; | |
dictionary_info& operator=(dictionary_info&&) = default; | |
~dictionary_info() = default; | |
constexpr const MeCab::DictionaryInfo& operator*()const{return *reinterpret_cast<const MeCab::DictionaryInfo*>(d);} | |
constexpr const MeCab::DictionaryInfo* operator->()const{return reinterpret_cast<const MeCab::DictionaryInfo*>(d);} | |
constexpr bool operator==(const dictionary_info& rhs)const noexcept{return d == rhs.d;} | |
constexpr bool operator!=(const dictionary_info& rhs)const noexcept{return d != rhs.d;} | |
dictionary_info& operator++(){d = d->next;return *this;} | |
dictionary_info operator++(int){auto ret = *this;++*this;return ret;} | |
}; | |
class dictionary_info_range{ | |
const MeCab::DictionaryInfo* beg; | |
constexpr explicit dictionary_info_range(const MeCab::DictionaryInfo* p):beg{p}{} | |
friend class model; | |
public: | |
dictionary_info begin()const{return dictionary_info{beg};} | |
dictionary_info end()const{return dictionary_info{};} | |
dictionary_info cbegin()const{return dictionary_info{beg};} | |
dictionary_info cend()const{return dictionary_info{};} | |
}; | |
class model{ | |
std::unique_ptr<MeCab::Model, void(*)(MeCab::Model*)> m; | |
public: | |
explicit model(const char* arg):m{MeCab::createModel(arg), &MeCab::deleteModel}{} | |
model(int argc, char** argv):m{MeCab::createModel(argc, argv), &MeCab::deleteModel}{} | |
model(model&&) = default; | |
MeCab::Model* get()const{return m.get();} | |
explicit operator bool()const{return bool{m};} | |
tagger create_tagger()const{return tagger{std::move(m->createTagger())};} | |
lattice create_lattice()const{return lattice{std::move(m->createLattice())};} | |
dictionary_info_range dictionary_info()const{return dictionary_info_range{m->dictionary_info()};} | |
}; | |
} | |
#endif//MECAB_HPP_INCLUDED_ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment