Skip to content

Instantly share code, notes, and snippets.

@arkanominia
Last active April 14, 2019 00:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arkanominia/b8154565650c76833827bb1fce310906 to your computer and use it in GitHub Desktop.
Save arkanominia/b8154565650c76833827bb1fce310906 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import random
from janome.tokenizer import Tokenizer
# Janomeを使用してテキストデータを単語に分割する
def wakati(text):
text = text.replace('\n','') #改行を削除
text = text.replace('\r','') #スペースを削除
text = text.replace('「','') ##開き括弧を削除
text = text.replace('」','') ##開き括弧を削除
text = text.replace('(','') ##開き括弧を削除
text = text.replace(')','') ##閉じ括弧を削除
text = text.replace('(','') ##開き括弧を削除
text = text.replace(')','') ##閉じ括弧を削除
t = Tokenizer()
result =t.tokenize(text, wakati=True)
return result
#デフォルトの文の数は20
def generate_text(num_sentence=20):
filename = "sample.txt"
src = open(filename, "r",encoding="utf-8").read()
wordlist = wakati(src)
## src = open(filename, "r").read() に,encoding="utf-8"を追加
#マルコフ連鎖用のテーブルを作成
markov = {}
w1 = ""
w2 = ""
for word in wordlist:
if w1 and w2:
if (w1, w2) not in markov:
markov[(w1, w2)] = []
markov[(w1, w2)].append(word)
w1, w2 = w2, word
#文章の自動生成
count_kuten = 0 #句点「。」の数
num_sentence= num_sentence
sentence = ""
w1, w2 = random.choice(list(markov.keys()))
while count_kuten < num_sentence:
tmp = random.choice(markov[(w1, w2)])
sentence += tmp
if(tmp=='。'):
count_kuten += 1
sentence += '\n' #1文ごとに改行
w1, w2 = w2, tmp
## sentence += tmp # sentenceにtmpを加える
## count_kuten += 1 # count_kutenの数を1増やす
with open('takuya.txt', 'a', encoding = 'utf_8') as f:
f.writelines(sentence)
##  with open('takuya.txt', 'a', encoding = 'utf_8') as f:  # dics_markov.txtを末尾追加で書き込み用で開く
##  f.writelines(sentence) # fにsentenceを書き込む
print(sentence)
if __name__ == "__main__":
generate_text()
## if __name__ == "__main__":  # 外部からインポートした時に自動で実行しないようにする
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment