Skip to content

Instantly share code, notes, and snippets.

@takkkun
Forked from mitukiii/mecab_benchmark.rb
Last active Dec 16, 2015
Embed
What would you like to do?
require 'mkmf'
have_header('mecab.h') && have_library('mecab') && create_makefile('hijiki')
#include "ruby.h"
#include "ruby/encoding.h"
#include "mecab.h"
typedef struct {
mecab_t *mecab;
rb_encoding *encoding;
} hijiki_t;
void hijiki_free(hijiki_t *hijiki) {
if (hijiki->mecab != NULL) {
mecab_destroy(hijiki->mecab);
}
}
static VALUE hijiki_alloc(VALUE klass) {
hijiki_t *hijiki = ALLOC(hijiki_t);
return Data_Wrap_Struct(klass, 0, hijiki_free, hijiki);
}
VALUE hijiki_initialize(int argc, VALUE *argv, VALUE self) {
char **options = ALLOC_N(char*, argc);
int i;
for (i = 0; i < argc; ++i) {
options[i] = RSTRING_PTR(argv[i]);
}
hijiki_t *hijiki;
Data_Get_Struct(self, hijiki_t, hijiki);
hijiki->mecab = mecab_new(argc, options);
hijiki->encoding = rb_enc_find("utf-8");
}
VALUE hijiki_parse(VALUE self, VALUE text) {
hijiki_t *hijiki;
Data_Get_Struct(self, hijiki_t, hijiki);
const char *output = mecab_sparse_tostr(hijiki->mecab, RSTRING_PTR(text));
return rb_enc_str_new(output, strlen(output), hijiki->encoding);
}
void Init_Hijiki() {
VALUE cHijiki = rb_define_class("Hijiki", rb_cObject);
rb_define_alloc_func(cHijiki, hijiki_alloc);
rb_define_method(cHijiki, "initialize", hijiki_initialize, -1);
rb_define_method(cHijiki, "parse", hijiki_parse, 1);
}
# -*- coding: utf-8 -*-
require 'benchmark'
require 'MeCab'
require 'natto'
require 'ffi'
require './Hijiki'
# Natto の parse のボトルネック解消したバージョン
class DaizuNatto < Natto::MeCab
def parse(str)
raise ArgumentError.new 'String to parse cannot be nil' if str.nil?
mecab_sparse_tostr(@tagger, str)
.force_encoding(Encoding.default_external)
end
end
# 自分で作ったバージョン
class Negitoro
extend FFI::Library
ffi_lib 'mecab'
attach_function :mecab_new2, [:string], :pointer
attach_function :mecab_sparse_tostr, [:pointer, :string], :string
attach_function :mecab_destroy, [:pointer], :void
def self.clean_proc(tagger)
-> { mecab_destroy tagger }
end
def initialize(option = "")
@tagger = mecab_new2 option
ObjectSpace.define_finalizer self, self.class.clean_proc(@tagger)
end
def parse(str)
raise ArgumentError.new 'String to parse cannot be nil' if str.nil?
mecab_sparse_tostr(@tagger, str)
.force_encoding(Encoding.default_external)
end
end
# ベンチマーク
def do_parse(tagger)
10000.times { tagger.parse("太郎はこの本を二郎を見た女性に渡した。") }
end
dicdir = File.dirname(`mecab -D`.each_line.grep(/^filename:/) { |s| s.sub(/^filename:\t/, '').chomp }.first)
Benchmark.bmbm(10) do |x|
x.report("和布蕪") { do_parse(MeCab::Tagger.new) }
x.report("納豆") { do_parse(Natto::MeCab.new) }
x.report("大豆納豆") { do_parse(DaizuNatto.new) }
x.report("ネギトロ") { do_parse(Negitoro.new) }
x.report("鹿尾菜") { do_parse(Hijiki.new('-O', 'wakati', '-d', dicdir)) }
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment