Skip to content

Instantly share code, notes, and snippets.

@quanon
Created February 28, 2019 15:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save quanon/96c44080be280ab9b031c8b878d50057 to your computer and use it in GitHub Desktop.
Save quanon/96c44080be280ab9b031c8b878d50057 to your computer and use it in GitHub Desktop.
import MeCab as mc
from matplotlib import pyplot as plt
from wordcloud import WordCloud
def mecab_analysis(text):
t = mc.Tagger('-Ochasen -d /usr/local/lib/mecab/dic/mecab-ipadic-neologd/')
t.parse('')
node = t.parseToNode(text)
output = []
while node:
word_type = node.feature.split(',')[0]
if word_type in ['形容詞', '動詞','名詞', '副詞']:
output.append(node.feature.split(',')[6])
node = node.next
if node is None:
break
return output
def create_wordcloud(words):
font_path = '/Users/quanon/Library/Fonts/RictyDiminished-Regular.ttf'
stopwords = ['てる', 'いる', 'なる', 'れる', 'する', 'ある', 'こと', 'これ', 'さん', 'して',
'くれる', 'やる', 'くださる', 'そう', 'せる', 'した', '思う',
'それ', 'ここ', 'ちゃん', 'くん', '', 'て','に','を','は','の', 'が', 'と', 'た', 'し', 'で',
'ない', 'も', 'な', 'い', 'か', 'ので', 'よう', '']
wordcloud = WordCloud(background_color='white', font_path=font_path,
width=900, height=500, stopwords=stopwords).generate(' '.join(words))
plt.figure(figsize=(15, 12))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
if __name__ == '__main__':
with open('flamingo.txt') as f:
text = f.read()
words = mecab_analysis(text.replace('\n','').replace('\u3000',''))
print(words)
create_wordcloud(words)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment