Skip to content

Instantly share code, notes, and snippets.

@namusyaka
Created August 12, 2013 09:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save namusyaka/6209519 to your computer and use it in GitHub Desktop.
Save namusyaka/6209519 to your computer and use it in GitHub Desktop.
using mecab
require 'MeCab'
require 'kconv'
class MecabParser
def initialize(param = nil)
@mecab = param ? MeCab::Tagger.new(param) : MeCab::Tagger.new
end
def parse(str)
@mecab.parse(str).split(/\n/).inject([]) do |data, line|
target, params = {}, line.split(/,|\s/).map(&:toutf8)
target[:data] = params[0]
target[:type] = params[1]
data << target if target[:type]
data
end
end
class << self
def parse(str, options = {})
mecab = self.new(options.delete(:param))
type, duplicate_count = options.delete(:type), options.delete(:duplicate_count)
result = mecab.parse(str)
result = result.select{|data| data[:type] == type } if type
result = result.inject([]) do |store, data|
unless position = store.index{|store_data| store_data[:data] == data[:data] }
store << data.merge(:count => 1)
else
store[position][:count] += 1
end
store
end if duplicate_count
result
end
end
end
p MecabParser.parse("すもももももももものうち", :type => "助詞")
p MecabParser.parse("すもももももももものうち", :type => "名詞", :duplicate_count => true)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment