Skip to content

Instantly share code, notes, and snippets.

@leojojo
Created May 21, 2018 06:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leojojo/d4b6990100ab746e59e6e8c996e18395 to your computer and use it in GitHub Desktop.
Save leojojo/d4b6990100ab746e59e6e8c996e18395 to your computer and use it in GitHub Desktop.
import sys, getopt
import csv, re, MeCab
from os import path
from wordcloud import WordCloud
import matplotlib.pyplot as plt
def clean_tweet(tweet):
reply = r'@[A-Za-z0-9\_\-]+'
hashtag = r'#\w+'
link = r'\w+:\/\/\S+'
rt = r'RT'
punc = r'[:\[\]ー,\.、。\']'
face = r'm?\\?\?₍{0,2} ?\([^a-np-zA-Zぁ-ふほ-んァ-ン一-龥]{2,14}\) ?\/?/?₎{0,2}m?.?☞?|([^a-np-zA-Zぁ-んァ-ン一-龥]{2,12})|\(.3\[.{1,5}.|・ω.{1,2}|¯\\_\(ツ\)_/¯|⊂.*[⊃₎]|_\(:3.*\)_|.*\)\"\"|.*ฅ|ʕ.*ʔ| ҉.*\*҉|\(.*੭ु| *_人*_| * ̄Y.* ̄|【| 】|>.*<|>.*<'
regex = reply+'|'+hashtag+'|'+link+'|'+rt+'|'+punc+'|'+face
tweet = re.sub(regex, '', tweet)
return tweet
def mecab_analysis(tweet):
tagger = MeCab.Tagger('-Ochasen -d /usr/local/lib/mecab/dic/ipadic')
for chunk in tagger.parse(clean_tweet(tweet)).splitlines()[:-1]:
surface,_,_,feature,_,_ = chunk.split('\t')
if bool(re.search(r'^[名形動]',feature)):
return surface
def parse_csv(csvfile):
string = ''
with open (csvfile, newline='') as f:
reader = csv.reader(f)
for row in reader:
string = string + str(mecab_analysis(row[5])) + ' '
return string
def draw_cloud(txt):
font = '/Library/Fonts/ヒラギノ丸ゴ ProN W4.ttc'
wordcloud = WordCloud(
font_path=font,
width=900,
height=500
).generate(txt)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
def main(argv):
inputfile = ''
try:
opts, args = getopt.getopt(argv, "hi:o:", ["ifile="])
except getopt.GetoptError:
print ('python3 twitter_analysis.py -i <inputfile>')
sys.exit(2)
for opt, arg in opts:
if opt in ("-i"):
inputfile = arg
draw_cloud(parse_csv(inputfile))
if __name__ == "__main__":
print("starting...")
main(sys.argv[1:])
@leojojo
Copy link
Author

leojojo commented May 21, 2018

$ brew install mecab
$ brew install mecab-python3
$ brew install mecab-ipdadic
$ git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git
$ pip3 install wordcloud

@leojojo
Copy link
Author

leojojo commented May 21, 2018

実行

$ python3  twi_wordcloud.py -i tweets.csv

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment