Skip to content

Instantly share code, notes, and snippets.

@wx257osn2
Created December 29, 2017 09:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wx257osn2/0ba053b03f0de0e60211252609f44f82 to your computer and use it in GitHub Desktop.
Save wx257osn2/0ba053b03f0de0e60211252609f44f82 to your computer and use it in GitHub Desktop.
MeCabのC++ラッパー
//Copyright (C) 2017 I
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef MECAB_HPP_INCLUDED_
#define MECAB_HPP_INCLUDED_
#include <string_view>
#include <memory>
#include <stdexcept>
#include <mecab.h>
//TODO: support expected
namespace mecab{
using parse_error = std::runtime_error;//TODO: create exception type
enum class node_status{
normal = MECAB_NOR_NODE,
unknown = MECAB_UNK_NODE,
bos = MECAB_BOS_NODE,
eos = MECAB_EOS_NODE,
eon = MECAB_EON_NODE
};
struct node:MeCab::Node{
std::string_view get_surface()const{return {this->surface, this->length};}
int character_type()const{return this->char_type;}
node_status status()const{return static_cast<node_status>(this->stat);}
bool is_best()const{return isbest == 1u;}
struct features_type{
std::string_view pos;
std::string_view pos_detail1;
std::string_view pos_detail2;
std::string_view pos_detail3;
std::string_view conjugated_type;
std::string_view conjugated_form;
std::string_view original_form;
std::string_view reading;
std::string_view pronunciation;
};
features_type features()const{
std::string_view view(this->feature);
auto f = [&]{
auto pos = view.find_first_of(',');
std::string_view ret(view.data(), pos);
view.remove_prefix(pos+1);
return ret;
};
features_type fs;
fs.pos = f();
fs.pos_detail1 = f();
fs.pos_detail2 = f();
fs.pos_detail3 = f();
fs.conjugated_type = f();
fs.conjugated_form = f();
fs.original_form = f();
fs.reading = f();
fs.pronunciation = view;
return fs;
}
};
class lattice{
std::unique_ptr<MeCab::Lattice, void(*)(MeCab::Lattice*)> l;
explicit lattice(MeCab::Lattice*&& lat):l{std::move(lat), &MeCab::deleteLattice}{}
friend class model;
template<typename Impl>
struct node_forward_iterator{
using value_type = node;
using reference = value_type&;
constexpr node_forward_iterator()noexcept:ptr{nullptr}{}
constexpr node_forward_iterator(MeCab::Node* p)noexcept:ptr{p}{}
constexpr node_forward_iterator(const node_forward_iterator&) = default;
node_forward_iterator& operator=(const node_forward_iterator&) = default;
constexpr node_forward_iterator(node_forward_iterator&&) = default;
node_forward_iterator& operator=(node_forward_iterator&&) = default;
~node_forward_iterator() = default;
constexpr node& operator*()const{return *reinterpret_cast<node*>(ptr);}
constexpr node* operator->()const{return reinterpret_cast<node*>(ptr);}
constexpr bool operator==(const node_forward_iterator& rhs)const noexcept{return ptr == rhs.ptr;}
constexpr bool operator!=(const node_forward_iterator& rhs)const noexcept{return ptr != rhs.ptr;}
node_forward_iterator& operator++(){Impl::next(ptr);return *this;}
node_forward_iterator operator++(int){auto ret = *this;++*this;return ret;}
protected:
MeCab::Node* ptr;
};
template<typename Impl>
struct node_bidirectional_iterator:node_forward_iterator<Impl>{
using node_forward_iterator<Impl>::node_forward_iterator;
node_bidirectional_iterator(const node_bidirectional_iterator&) = default;
node_bidirectional_iterator(node_bidirectional_iterator&&) = default;
node_bidirectional_iterator& operator=(const node_bidirectional_iterator&) = default;
node_bidirectional_iterator& operator=(node_bidirectional_iterator&&) = default;
node_bidirectional_iterator& operator++(){Impl::next(this->ptr);return *this;}
node_bidirectional_iterator operator++(int){auto ret = *this;++*this;return ret;}
node_bidirectional_iterator& operator--(){Impl::prev(this->ptr);return *this;}
node_bidirectional_iterator operator--(int){auto ret = *this;--*this;return ret;}
};
template<typename Impl>
struct const_node_forward_iterator{
using value_type = const node;
using reference = value_type&;
constexpr const_node_forward_iterator()noexcept:ptr{nullptr}{}
constexpr const_node_forward_iterator(const MeCab::Node* p)noexcept:ptr{p}{}
constexpr const_node_forward_iterator(const const_node_forward_iterator&) = default;
const_node_forward_iterator& operator=(const const_node_forward_iterator&) = default;
constexpr const_node_forward_iterator(const_node_forward_iterator&&) = default;
const_node_forward_iterator& operator=(const_node_forward_iterator&&) = default;
~const_node_forward_iterator() = default;
constexpr const node& operator*()const{return *reinterpret_cast<const node*>(ptr);}
constexpr const node* operator->()const{return reinterpret_cast<const node*>(ptr);}
constexpr bool operator==(const const_node_forward_iterator& rhs)const noexcept{return ptr == rhs.ptr;}
constexpr bool operator!=(const const_node_forward_iterator& rhs)const noexcept{return ptr != rhs.ptr;}
const_node_forward_iterator& operator++(){Impl::next(ptr);return *this;}
const_node_forward_iterator operator++(int){auto ret = *this;++*this;return ret;}
protected:
const MeCab::Node* ptr;
};
template<typename Impl>
struct const_node_bidirectional_iterator:const_node_forward_iterator<Impl>{
using const_node_forward_iterator<Impl>::const_node_forward_iterator;
const_node_bidirectional_iterator(const const_node_bidirectional_iterator&) = default;
const_node_bidirectional_iterator(const_node_bidirectional_iterator&&) = default;
const_node_bidirectional_iterator& operator=(const const_node_bidirectional_iterator&) = default;
const_node_bidirectional_iterator& operator=(const_node_bidirectional_iterator&&) = default;
const_node_bidirectional_iterator& operator++(){Impl::next(this->ptr);return *this;}
const_node_bidirectional_iterator operator++(int){auto ret = *this;++*this;return ret;}
const_node_bidirectional_iterator& operator--(){Impl::prev(this->ptr);return *this;}
const_node_bidirectional_iterator operator--(int){auto ret = *this;--*this;return ret;}
};
template<typename Iterator, typename ConstIterator>
class range{
MeCab::Node* beg;
constexpr explicit range(MeCab::Node* b):beg{b}{}
friend lattice;
public:
using iterator = Iterator;
using const_iterator = ConstIterator;
iterator begin(){return iterator{beg};}
iterator end(){return iterator{};}
const_iterator begin()const{return const_iterator{beg};}
const_iterator end()const{return const_iterator{};}
const_iterator cbegin()const{return const_iterator{beg};}
const_iterator cend()const{return const_iterator{};}
};
struct bos_iterator_impl{
static void next(MeCab::Node*& ptr){ptr = ptr->next;}
static void next(const MeCab::Node*& ptr){ptr = ptr->next;}
static void prev(MeCab::Node*& ptr){ptr = ptr->prev;}
static void prev(const MeCab::Node*& ptr){ptr = ptr->prev;}
};
struct begins_iterator_impl{
static void next(MeCab::Node*& ptr){ptr = ptr->bnext;}
static void next(const MeCab::Node*& ptr){ptr = ptr->bnext;}
};
struct ends_iterator_impl{
static void next(MeCab::Node*& ptr){ptr = ptr->enext;}
static void next(const MeCab::Node*& ptr){ptr = ptr->enext;}
};
public:
enum class request_type{
one_best_result = MECAB_ONE_BEST,
n_best_results = MECAB_NBEST,
partial_parsing = MECAB_PARTIAL,
marginal_probabilities = MECAB_MARGINAL_PROB,
alternative_results = MECAB_ALTERNATIVE,
all_morphs = MECAB_ALL_MORPHS,
allocate_sentence = MECAB_ALLOCATE_SENTENCE
};
friend constexpr request_type operator|(const request_type& lhs, const request_type& rhs)noexcept{return static_cast<request_type>(static_cast<int>(lhs) | static_cast<int>(rhs));}
lattice(lattice&&) = default;
MeCab::Lattice* get()const{return l.get();}
explicit operator bool()const{return bool{l};}
lattice& clear(){l->clear();return *this;}
lattice& set_sentence(const char* sentence){l->set_sentence(sentence);return *this;}
lattice& set_sentence(const char* sentence, std::size_t len){l->set_sentence(sentence, len);return *this;}
lattice& set_request_type(request_type t){l->set_request_type(static_cast<int>(t));return *this;}
lattice& add_request_type(request_type t){l->add_request_type(static_cast<int>(t));return *this;}
lattice& remove_request_type(request_type t){l->remove_request_type(static_cast<int>(t));return *this;}
bool has_request_type(request_type t)const{return l->has_request_type(static_cast<int>(t));}
request_type get_request_type()const{return static_cast<request_type>(l->request_type());}
const char* to_str()const{return l->toString();}
using bos_range = range<node_bidirectional_iterator<bos_iterator_impl>, const_node_bidirectional_iterator<bos_iterator_impl>>;
using begins_range = range<node_forward_iterator<begins_iterator_impl>, const_node_forward_iterator<begins_iterator_impl>>;
using ends_range = range<node_forward_iterator<ends_iterator_impl>, const_node_forward_iterator<ends_iterator_impl>>;
bos_range nodes()const{return bos_range{l->bos_node()};}
begins_range begins(std::size_t pos)const{return begins_range{l->begin_nodes(pos)};}
ends_range ends(std::size_t pos)const{return ends_range(l->end_nodes(pos));}
bool is_available()const{return l->is_available();}
const char* sentence()const{return l->sentence();}
std::size_t size()const{return l->size();}
bool next(){return l->next();}
template<typename F>
void n_best(F&& f){
std::size_t i = 0;
while(true){
std::forward<F>(f)(*this, i++);
if(!next())
break;
}
}
template<typename F>
void n_best(F&& f, std::size_t n){
for(std::size_t i = 0; i < n; ++i){
std::forward<F>(f)(*this, i);
if(!next())
break;
}
}
float theta()const{return l->theta();}
};
class tagger{
std::unique_ptr<MeCab::Tagger, void(*)(MeCab::Tagger*)> t;
explicit tagger(MeCab::Tagger*&& tag):t{std::move(tag), &MeCab::deleteTagger}{}
friend class model;
public:
tagger(tagger&&) = default;
MeCab::Tagger* get()const{return t.get();}
explicit operator bool()const{return bool{t};}
bool parse(MeCab::Lattice* l)const{return t->parse(l);}
template<typename Lattice>
bool parse(Lattice&& l)const{return parse(std::forward<Lattice>(l).get());}
const char* parse(const char* str){return t->parse(str);}
const char* what()const{return t->what();}
};
class dictionary_info{
const MeCab::DictionaryInfo* d;
public:
using value_type = const MeCab::DictionaryInfo;
using reference = value_type&;
constexpr explicit dictionary_info()noexcept:d{nullptr}{}
constexpr explicit dictionary_info(const MeCab::DictionaryInfo* p)noexcept:d{p}{}
constexpr dictionary_info(const dictionary_info&) = default;
dictionary_info& operator=(const dictionary_info&) = default;
constexpr dictionary_info(dictionary_info&&) = default;
dictionary_info& operator=(dictionary_info&&) = default;
~dictionary_info() = default;
constexpr const MeCab::DictionaryInfo& operator*()const{return *reinterpret_cast<const MeCab::DictionaryInfo*>(d);}
constexpr const MeCab::DictionaryInfo* operator->()const{return reinterpret_cast<const MeCab::DictionaryInfo*>(d);}
constexpr bool operator==(const dictionary_info& rhs)const noexcept{return d == rhs.d;}
constexpr bool operator!=(const dictionary_info& rhs)const noexcept{return d != rhs.d;}
dictionary_info& operator++(){d = d->next;return *this;}
dictionary_info operator++(int){auto ret = *this;++*this;return ret;}
};
class dictionary_info_range{
const MeCab::DictionaryInfo* beg;
constexpr explicit dictionary_info_range(const MeCab::DictionaryInfo* p):beg{p}{}
friend class model;
public:
dictionary_info begin()const{return dictionary_info{beg};}
dictionary_info end()const{return dictionary_info{};}
dictionary_info cbegin()const{return dictionary_info{beg};}
dictionary_info cend()const{return dictionary_info{};}
};
class model{
std::unique_ptr<MeCab::Model, void(*)(MeCab::Model*)> m;
public:
explicit model(const char* arg):m{MeCab::createModel(arg), &MeCab::deleteModel}{}
model(int argc, char** argv):m{MeCab::createModel(argc, argv), &MeCab::deleteModel}{}
model(model&&) = default;
MeCab::Model* get()const{return m.get();}
explicit operator bool()const{return bool{m};}
tagger create_tagger()const{return tagger{std::move(m->createTagger())};}
lattice create_lattice()const{return lattice{std::move(m->createLattice())};}
dictionary_info_range dictionary_info()const{return dictionary_info_range{m->dictionary_info()};}
};
}
#endif//MECAB_HPP_INCLUDED_
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment