Last active
December 16, 2015 10:39
-
-
Save rummelonp/5421629 to your computer and use it in GitHub Desktop.
MeCab + Ruby でベンチ取ってみた
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
require 'benchmark' | |
require 'MeCab' | |
require 'natto' | |
require 'ffi' | |
# Natto#parse のボトルネック解消したバージョン | |
class DaizuNatto < Natto::MeCab | |
def parse(str) | |
raise ArgumentError.new 'String to parse cannot be nil' if str.nil? | |
mecab_sparse_tostr(@tagger, str) | |
.force_encoding(Encoding.default_external) | |
end | |
end | |
# 自分で作ったバージョン | |
class Negitoro | |
extend FFI::Library | |
ffi_lib 'mecab' | |
attach_function :mecab_new2, [:string], :pointer | |
attach_function :mecab_sparse_tostr, [:pointer, :string], :string | |
attach_function :mecab_destroy, [:pointer], :void | |
def self.clean_proc(tagger) | |
Proc.new { mecab_destroy tagger } | |
end | |
def initialize(option = "") | |
@tagger = mecab_new2 option | |
ObjectSpace.define_finalizer self, self.class.clean_proc(@tagger) | |
end | |
def parse(str) | |
raise ArgumentError.new 'String to parse cannot be nil' if str.nil? | |
mecab_sparse_tostr(@tagger, str) | |
.force_encoding(Encoding.default_external) | |
end | |
end | |
# ベンチマーク | |
def do_parse(tagger) | |
10000.times { tagger.parse("太郎はこの本を二郎を見た女性に渡した。") } | |
end | |
Benchmark.bmbm(10) do |x| | |
x.report("和布蕪") { do_parse(MeCab::Tagger.new) } | |
x.report("納豆") { do_parse(Natto::MeCab.new) } | |
x.report("大豆納豆") { do_parse(DaizuNatto.new) } | |
x.report("ネギトロ") { do_parse(Negitoro.new) } | |
end |
Author
rummelonp
commented
Apr 19, 2013
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment