Skip to content

Instantly share code, notes, and snippets.

@komamitsu
Created February 15, 2012 14:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save komamitsu/1836162 to your computer and use it in GitHub Desktop.
Save komamitsu/1836162 to your computer and use it in GitHub Desktop.
practice for using MeCab and Markov chain
# Encoding: UTF-8
require 'MeCab'
mecab = MeCab::Tagger.new
node = mecab.parseToNode (ARGF.read)
x,y = nil,nil
acm = []
while node do
z = node.surface
z = ' ' if z =~ /[\(\)]/ || z == '(' || z == ')' || z == '「' || z == '」' || z == ' '
node = node.next
if x && y && acm.none?{|some| some == [x, y, z]}
acm << [x, y, z]
end
x, y = y, z
end
4.times do
tpl = acm[Random.rand(acm.size)]
str = "#{tpl[0] + tpl[1]}"
30.times do
cnds = acm.select{|x, y, z| x == tpl[0] && y == tpl[1]}
tpl = cnds[Random.rand(cnds.size)]
str << tpl[2]
tpl = [tpl[1], tpl[2], nil]
end
puts '"' + str + '"'
end
komamitsu@carrot:~/lab/ruby$ ruby mk.rb ~/tmp/hoge
"の品が多数であって、その最初の種子を植え付けたものは有数な古書籍店に頼んでおけばどこかで掘り出して"
"ような特異の現象の生ずるには「文庫」といわれてひどくうれしがったものは書籍は哀れな末路を遂げて、求める"
"無批判的な恋をしたもののあったかもしれない。日本にオリジナルな頭脳をもっていない限り、やはり評判の高いほう"
"べえ)さんの家族がいてその長男の楠(くす)さんというのであってもたぶん明治年間に現われる書物の荷重に"
# Encoding: UTF-8
require 'MeCab'
STATE = 3
LINE_COUNT = 4
UNIT_COUNT = 30
mecab = MeCab::Tagger.new
node = mecab.parseToNode (ARGF.read)
last = []
acm = []
while node do
z = node.surface
z = ' ' if z =~ /[\(\)]/ || z == '(' || z == ')' || z == '「' || z == '」' || z == ' '
last << z
if last.size > STATE
last.shift
if acm.none?{|x| x == last}
acm << last.clone
end
end
node = node.next
end
LINE_COUNT.times do
tpl = acm[Random.rand(acm.size)]
str = tpl[0, (STATE - 1)].join
UNIT_COUNT.times do
# collect tuples matching first (STATE - 1) units
cnds = acm.select {|units| tpl[0, (STATE - 1)] == units[0, (STATE - 1)]}
tpl = cnds[Random.rand(cnds.size)].clone
str << tpl.last
tpl.shift
end
puts '"' + str + '"'
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment