Created
February 15, 2012 14:29
-
-
Save komamitsu/1836162 to your computer and use it in GitHub Desktop.
practice for using MeCab and Markov chain
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Encoding: UTF-8 | |
require 'MeCab' | |
mecab = MeCab::Tagger.new | |
node = mecab.parseToNode (ARGF.read) | |
x,y = nil,nil | |
acm = [] | |
while node do | |
z = node.surface | |
z = ' ' if z =~ /[\(\)]/ || z == '(' || z == ')' || z == '「' || z == '」' || z == ' ' | |
node = node.next | |
if x && y && acm.none?{|some| some == [x, y, z]} | |
acm << [x, y, z] | |
end | |
x, y = y, z | |
end | |
4.times do | |
tpl = acm[Random.rand(acm.size)] | |
str = "#{tpl[0] + tpl[1]}" | |
30.times do | |
cnds = acm.select{|x, y, z| x == tpl[0] && y == tpl[1]} | |
tpl = cnds[Random.rand(cnds.size)] | |
str << tpl[2] | |
tpl = [tpl[1], tpl[2], nil] | |
end | |
puts '"' + str + '"' | |
end | |
komamitsu@carrot:~/lab/ruby$ ruby mk.rb ~/tmp/hoge | |
"の品が多数であって、その最初の種子を植え付けたものは有数な古書籍店に頼んでおけばどこかで掘り出して" | |
"ような特異の現象の生ずるには「文庫」といわれてひどくうれしがったものは書籍は哀れな末路を遂げて、求める" | |
"無批判的な恋をしたもののあったかもしれない。日本にオリジナルな頭脳をもっていない限り、やはり評判の高いほう" | |
"べえ)さんの家族がいてその長男の楠(くす)さんというのであってもたぶん明治年間に現われる書物の荷重に" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Encoding: UTF-8 | |
require 'MeCab' | |
STATE = 3 | |
LINE_COUNT = 4 | |
UNIT_COUNT = 30 | |
mecab = MeCab::Tagger.new | |
node = mecab.parseToNode (ARGF.read) | |
last = [] | |
acm = [] | |
while node do | |
z = node.surface | |
z = ' ' if z =~ /[\(\)]/ || z == '(' || z == ')' || z == '「' || z == '」' || z == ' ' | |
last << z | |
if last.size > STATE | |
last.shift | |
if acm.none?{|x| x == last} | |
acm << last.clone | |
end | |
end | |
node = node.next | |
end | |
LINE_COUNT.times do | |
tpl = acm[Random.rand(acm.size)] | |
str = tpl[0, (STATE - 1)].join | |
UNIT_COUNT.times do | |
# collect tuples matching first (STATE - 1) units | |
cnds = acm.select {|units| tpl[0, (STATE - 1)] == units[0, (STATE - 1)]} | |
tpl = cnds[Random.rand(cnds.size)].clone | |
str << tpl.last | |
tpl.shift | |
end | |
puts '"' + str + '"' | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment