Skip to content

Instantly share code, notes, and snippets.

@graph226
Last active May 31, 2016 12:04
Show Gist options
  • Save graph226/6fa5f45d1b521ae1c606f69aaa09318d to your computer and use it in GitHub Desktop.
Save graph226/6fa5f45d1b521ae1c606f69aaa09318d to your computer and use it in GitHub Desktop.
ワードクラウド
#coding: utf-8
import csv
import MeCab
import matplotlib.pyplot as plt
from wordcloud import WordCloud
STOP_WORDS = "の もの こと よう これ 一 http:// の 笑 ω 物 とき ら".split()
STOP_NOUN_DETAILS = "数 接続助詞 接尾 代名詞 非自立 副詞可能".split()
def csv_reader(data_path):
file = open(data_path, 'r')
data_list = csv.reader(file)
return data_list
def get_nouns(string):
tagger = MeCab.Tagger('-d /opt/brew/lib/mecab/dic/mecab-ipadic-neologd')
text = str(string)
node = tagger.parseToNode(text)
nouns = []
while node:
word_detail = node.feature.split(",")
pos = node.feature.split(",")[0]
word = node.surface
noun_detail = word_detail[1]
if pos == "名詞":
if noun_detail not in STOP_NOUN_DETAILS:
if word not in STOP_WORDS:
nouns.append(word)
node = node.next
return nouns
def main():
fpath = "/Library/Fonts/Yu Gothic Medium.otf"
history_list = csv_reader('')
lst = list(history_list)
nouns_all = []
for row in lst:
text = row[0]
nouns_in_row = get_nouns(text)
nouns_all.extend(nouns_in_row)
lst_wordcloud = " ".join(nouns_all).decode('utf-8')
wordcloud = WordCloud(
background_color = "white",
font_path = fpath,
width = 2880,
height = 1800).generate(lst_wordcloud)
plt.figure(figsize=(29,18))
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
## Execute
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment