Skip to content

Instantly share code, notes, and snippets.

@ichyo
Created January 3, 2013 13:21
Show Gist options
  • Save ichyo/4443429 to your computer and use it in GitHub Desktop.
Save ichyo/4443429 to your computer and use it in GitHub Desktop.
マルコフ連鎖で文章作るねん
#!/usr/bin/env python
# coding: utf-8
import os
import sys
import re
from collections import defaultdict
import random
def split(sentence):
wakati = os.popen("echo '#{text}' | mecab -Owakati".format(text=sentence)).read()
return wakati.split()[1:]
count = defaultdict(list)
def learn(words, n):
words.append("$")
state = ("^", ) * n
for w in words:
count[state].append(w)
state = state[1:] + (w, )
def create(n):
state = ("^", ) * n
res = ""
while True:
if len(count[state]) == 0:
break
w = random.choice(count[state])
if w == "$":
break
state = state[1:] + (w, )
if re.search("[a-zA-Z]", res[-1:]) != None and re.search("[a-zA-Z]", w[0:1]) != None:
res += " "
res += w
return res
if __name__ == '__main__':
N = 2
text = sys.stdin.readlines()
for s in text:
if len(s) > 5:
learn(split(s), N)
for i in range(50):
print create(N)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment