Skip to content

Instantly share code, notes, and snippets.

@YoshihitoAso
Created March 30, 2016 10:29
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save YoshihitoAso/1e98effc961bf6fc3a14ce723d7c49f9 to your computer and use it in GitHub Desktop.
Save YoshihitoAso/1e98effc961bf6fc3a14ce723d7c49f9 to your computer and use it in GitHub Desktop.
メール文章のポジネガ判定
#!/usr/local/bin/python
# -*- coding:utf-8 -*-
import json
import csv
import MeCab as mc
################################################
# メール文章のポジネガ判定
#
# Copyright 2016 YoshihitoAso(@y_asoh)
################################################
# 形態素解析(Mecab)
def mecab_analysis(sentence):
#mt = mc.Tagger('-Owakati -d /var/lib/mecab/dic/ipadic-utf8/')
mt = mc.Tagger('-Ochasen -d /var/lib/mecab/dic/ipadic-utf8/')
sentence_u = sentence.encode('utf-8')
res = mt.parseToNode(sentence_u)
sents = []
while res:
if res.surface != "":
word_type = res.feature.split(",")[0]
#print(word_type)
if word_type in ["形容詞","動詞","名詞","副詞"]:
plain_word = res.feature.split(",")[6]
if plain_word != "*":
#print("pw = " + plain_word)
#print("res = " + res.feature)
sents.append(plain_word)
res = res.next
return sents
# 名詞評価極性辞書を読み込む
in_file = csv.reader(file(r'wago.txt'), delimiter = '\t')
pne = []
for line in in_file:
try:
if line[1] == 'p': score = 1.0
elif line[1] == 'e': score = 0.5
elif line[1] == 'n': score = 0.0
pne.append((line[0],score))
#print(line[0])
except: pass
# トークンのリストのポジネガを判定
def judge_pn(token):
score = 0
num_score = 0
for _pne in pne:
if token == _pne[0]:
score += _pne[1]
num_score += 1
if num_score != 0:
pn_rate = float(score)/float(num_score)
else: pn_rate = 0.5
return pn_rate
def pn_rates_and_sents(sents):
pn_rates = []
pn_rates_with_sents = []
for sent_list in sents:
for sent in sent_list:
#print(sent) #ポジネガ判定の対象となる単語を出力する
pn_rate = judge_pn(sent)
pn_rates.append(pn_rate)
pn_rates_with_sents.append((sent[0], pn_rate))
return pn_rates, pn_rates_with_sents
# P(ポジティブ)/E(イーブン)/N(ネガティブ) のスコアを算出して出力する
def print_scores(pn_rates):
p, e, n = 0.0, 0.0, 0.0
p_num, e_num, n_num = 0.0, 0.0, 0.0
for pn in pn_rates:
if pn > 0.5:
p += pn
p_num += 1
elif pn == 0.5:
e += pn
e_num += 1
elif pn < 0.5:
n += pn
n_num += 1
sum = p_num + e_num + n_num
print("")
print("#### Positive Score, Even Score, Negative Score ####")
print(p, e, n)
print("")
print("#### Postive Num, Even Num, Negative Num ####")
print(p_num, e_num, n_num)
print("")
print("#### Positive %, Even %, Negative % ####")
print(p_num/sum, e_num/sum, n_num/sum)
print("")
# Main
if __name__ == "__main__":
#メールデータ(JSONフォーマット)の読み込み
with open('mail.json', 'r') as f:
mail_list = json.load(f)
sents = []
body_all_str = ""
for mail in mail_list:
body = mail['body']
#body_all_str = body_all_str + body
sent = mecab_analysis(body)
sents.append(sent)
#sents = mecab_analysis(body_all_str)
pn_rates, pn_rates_with_sents = pn_rates_and_sents(sents)
print_scores(pn_rates)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment