Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# -*- coding: utf-8 -*-
require 'httpclient'
require 'nokogiri'
def get text
endpoint = 'http://jlp.yahooapis.jp/MAService/V1/parse'
appid = 'RZv4ed6xg67n15cyyGFF8Io0r3i.o0uwISXfrFOZYyMghbdeNA10_M6KemHLqz0laQ--'
params = {
:appid => appid,
:sentence => text,
}
client = HTTPClient.new
Nokogiri (client.get endpoint, params).body
end
class Word
attr_accessor :surface, :reading, :pos
def self.new_from_node node
me = new
me.surface = (node.at 'surface').text
me.reading = (node.at 'reading').text
me.pos = (node.at 'pos').text
me
end
def is_suffix
%w{ 連体詞 接続詞 接尾辞 助詞 助動詞 特殊 }.include? pos
end
def is_special
pos == '特殊'
end
def reading_length
return 0 if is_special
reading.gsub(/[ぁぃぅぇぉゎゃゅょ]/, '').length
end
end
class Array
def body
map{ |item|
item.surface
}.join ''
end
def reading_length
inject(0) { |a, b| a + b.reading_length }
end
end
text = ARGV.first
unless text
warn "usage: #{$0} (text)"
exit 1
end
res = get text
state = 0
buffer = []
length_count = Hash.new 0
words = (res.search 'word')
words.each_with_index{ |node, index|
is_last = index+1 == words.length
word = Word.new_from_node node
if state == 1 and not word.is_suffix
state = 0
puts "#{buffer.reading_length}\t#{buffer.body}"
length_count[buffer.reading_length] += 1
buffer = []
elsif state == 0 and word.is_suffix
state = 1
end
buffer << word
if is_last
puts "#{buffer.reading_length}\t#{buffer.body}"
length_count[buffer.reading_length] += 1
end
}
puts
(1 .. length_count.keys.max).each{ |key|
puts "#{key}\t#{'*' * length_count[key]}"
}
% ruby reading_length.rb "「この味がいいね」と君が言ったから七月六日はサラダ記念日"
2 「この
3 味が
4 いいね」と
3 君が
5 言ったから
3 七月
4 六日は
7 サラダ記念日
1
2 *
3 ***
4 **
5 *
6
7 *
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.