Created
May 2, 2016 01:19
-
-
Save rubyu/4371649d30116581b8ef0d29e0a98dc8 to your computer and use it in GitHub Desktop.
TWCNB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print "each classes -> count[^c] * log((w[^c] + s) / (cw[^c] + s_all))" | |
cw = {} | |
cw_all = 0 | |
for c in classes: | |
cw[c] = 0 | |
denominator = 0 | |
for ec in classes: | |
if ec != c: | |
denominator += classWeights[ec] | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
count = 0 | |
for ec in classes: | |
if ec != c: | |
count += getClassWordCount(ec, word) | |
numerator = 0 | |
for ec in classes: | |
if ec != c: | |
numerator += getClassWordWeight(ec, word) | |
cw[c] += float(count) * math.log( (numerator + s) / (denominator + s_all) ) | |
cw_all += cw[c] | |
for c in classes: | |
cw[c] /= cw_all | |
print "\"%s\": %s," % (c, cw[c]) | |
# The variable cw should be assgined to class_weights[smoothing_parameter]["log_completement"] here. | |
# I would guess do this each time by my hands... :P |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!-*- coding:utf-8 -*- | |
import os | |
import sys | |
import math | |
import random | |
from datetime import datetime | |
import time | |
import urllib | |
import sqlite3 | |
conn = sqlite3.connect("classifier_for_nripper.db") | |
conn.execute("PRAGMA synchronous=OFF") | |
conn.execute("PRAGMA cache_size=20000") | |
conn.execute(""" | |
CREATE TABLE IF NOT EXISTS word_doc_count ( | |
word TEXT PRIMARY KEY NOT NULL, | |
doc_count INTEGER NOT NULL | |
) | |
""") | |
conn.execute(""" | |
CREATE TABLE IF NOT EXISTS global ( | |
key TEXT PRIMARY KEY NOT NULL, | |
value INTEGER NOT NULL | |
) | |
""") | |
conn.execute(""" | |
CREATE TABLE IF NOT EXISTS class_word_weight ( | |
class TEXT NOT NULL, | |
word TEXT NOT NULL, | |
weight REAL NOT NULL, | |
UNIQUE(class, word) | |
) | |
""") | |
conn.execute(""" | |
CREATE TABLE IF NOT EXISTS class_word_count ( | |
class TEXT NOT NULL, | |
word TEXT NOT NULL, | |
count INTEGER NOT NULL, | |
UNIQUE(class, word) | |
) | |
""") | |
conn.commit() | |
import MeCab | |
m = MeCab.Tagger("-Owakati") | |
def segment(s): | |
s = m.parse(s.encode("utf-8")) | |
s = s.decode("utf-8") | |
return s.rstrip(" \n").split(" ") | |
def getWords(doc): | |
words = segment(doc) | |
words = [s.strip().lower() for s in words] | |
return words | |
def toDataArray(words): | |
h = {} | |
for w in words: | |
if h.has_key(w): | |
h[w] += 1 | |
else: | |
h[w] = 1 | |
data = [] | |
for w in h: | |
data.append((w, h[w])) | |
return data | |
def getWordDocCount(word): | |
res = conn.execute("SELECT doc_count FROM word_doc_count WHERE word = ?", [word]).fetchone() | |
if res == None: | |
return 0 | |
else: | |
return res[0] | |
def bulkIncrWordDocCount(data): | |
for d in data: | |
word, count = d | |
#count += getWordDocCount(word) | |
count = getWordDocCount(word) + 1 | |
conn.execute("INSERT OR REPLACE INTO word_doc_count VALUES(?, ?)", [word, count]) | |
def getGlobalValue(key): | |
res = conn.execute("SELECT value FROM global WHERE key = ?", [key]).fetchone() | |
if res == None: | |
return 0 | |
else: | |
return res[0] | |
def incrGlobalValue(key): | |
value = getGlobalValue(key) | |
value += 1 | |
conn.execute("INSERT OR REPLACE INTO global VALUES(?, ?)", [key, value]) | |
def getClassWordWeight(c, word): | |
res = conn.execute("SELECT weight FROM class_word_weight WHERE class= ? AND word = ?", [c, word]).fetchone() | |
if res == None: | |
return 0 | |
else: | |
return res[0] | |
def bulkAddClassWordWeight(c, weights): | |
for d in weights: | |
weight, word = d | |
weight += getClassWordWeight(c, word) | |
conn.execute("INSERT OR REPLACE INTO class_word_weight VALUES(?, ?, ?)", [c, word, weight]) | |
def getClassWordCount(c, word): | |
res = conn.execute("SELECT count FROM class_word_count WHERE class= ? AND word = ?", [c, word]).fetchone() | |
if res == None: | |
return 0 | |
else: | |
return res[0] | |
def bulkAddClassWordCount(c, data): | |
for d in data: | |
word, count = d | |
count += getClassWordCount(c, word) | |
conn.execute("INSERT OR REPLACE INTO class_word_count VALUES(?, ?, ?)", [c, word, count]) | |
""" | |
def getClassList(): | |
res = conn.execute("SELECT DISTINCT class FROM class_word_weight").fetchone() | |
if res == None: | |
return 0 | |
else: | |
return res[0] | |
""" | |
def getClassWeight(c): | |
res = conn.execute("SELECT SUM(weight) FROM class_word_weight WHERE class = ?", [c]).fetchone() | |
if res == None: | |
return 0 | |
else: | |
return res[0] | |
def getTF(count): | |
tf = math.log( count + 1 ) | |
return tf | |
def getIDF(docs, total): | |
idf = math.log( float(total) / docs ) | |
return idf | |
def getSD(weights, target): | |
if 0 == len(weights): | |
return | |
t = 0 | |
avg = 0 | |
for d in weights: | |
weight, targetc = d | |
avg += weight | |
if targetc == target: | |
t = weight | |
avg /= len(weights) | |
sd = 0 | |
for d in weights: | |
weight, targetc = d | |
sd += (weight - avg) ** 2 | |
sd /= len(weights) | |
sd = math.sqrt(sd) | |
if 0 == sd: | |
return | |
t_sd = (10 * (t - avg)) / sd + 50 | |
return (avg, sd, t_sd) | |
def getOrder(weights, target): | |
order = 0 | |
for d in weights: | |
order += 1 | |
weight, targetc = d | |
if targetc == target: | |
return order | |
def printProbs(weights, target): | |
print "-" * 30 | |
order = 0 | |
for d in weights: | |
order += 1 | |
weight, targetc = d | |
if targetc == target: | |
print "%2d %s: %s \t!target!" % (order, weight, targetc) | |
else: | |
print "%2d %s: %s" % (order, weight, targetc) | |
print "-" * 30 | |
def getDirs(path): | |
list = [] | |
for i in os.listdir(path): | |
if os.path.isdir(path + "\\" + i): | |
list.append(i) | |
return list | |
def getFiles(path): | |
list = [] | |
for i in os.listdir(path): | |
if os.path.isfile(path + "\\" + i): | |
list.append(i) | |
return list | |
if __name__ == "__main__": | |
corpus = "arcadia" | |
classes = getDirs(corpus) | |
use_for_test = 10 | |
if False: | |
print "add document ..." | |
for c in classes: | |
print "class: %s" % c | |
files = getFiles(corpus + "\\" + c) | |
print "total: %s" % len(files) | |
fcount = 0 | |
for f in files[:-10]: | |
fcount += 1 | |
print "%s / %s" % (fcount, len(files)) | |
doc = open(corpus + "\\" + c + "\\" + f).read() | |
doc = unicode(doc, "utf-8", errors="replace") | |
words = getWords(doc) | |
data = toDataArray(words) | |
print "%s unique words" % len(data) | |
bulkIncrWordDocCount(data) | |
incrGlobalValue("totalDocCount") | |
conn.commit() | |
if False: | |
print "compute weights ..." | |
for c in classes: | |
print "class: %s" % c | |
files = getFiles(corpus + "\\" + c) | |
print "total: %s" % len(files) | |
fcount = 0 | |
for f in files[:-10]: | |
fcount += 1 | |
print "%s / %s" % (fcount, len(files)) | |
doc = open(corpus + "\\" + c + "\\" + f).read() | |
doc = unicode(doc, "utf-8", errors="replace") | |
words = getWords(doc) | |
data = toDataArray(words) | |
print "%s unique words" % len(data) | |
weights = [] | |
weightTotal = 0 | |
for d in data: | |
word, count = d | |
totalDocCount = getGlobalValue("totalDocCount") | |
docCount = getWordDocCount(word) | |
tf = getTF(count) | |
idf = getIDF(docCount, totalDocCount) | |
weight = tf * idf | |
weights.append((weight, word)) | |
weightTotal += weight ** 2 | |
weightTotal = math.sqrt(weightTotal) | |
nWeights = [] | |
nWeightTotal = 0 | |
for d in weights: | |
weight, word = d | |
weight /= weightTotal | |
nWeightTotal += weight | |
nWeights.append((weight, word)) | |
if False: | |
nWeights.sort(reverse=True) | |
for d in nWeights[:50]: | |
weight, word = d | |
try: | |
print "%s: %s" % (word.encode("shift-jis"), weight) | |
except: | |
pass | |
bulkAddClassWordCount(c, data) | |
bulkAddClassWordWeight(c, nWeights) | |
conn.commit() | |
""" | |
クラスごとの単語数 | |
""" | |
if False: | |
total = 0 | |
count = {} | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
total += 1 | |
word = row[0] | |
for c in classes: | |
if 0 != getClassWordWeight(c, word): | |
if not count.has_key(c): | |
count[c] = 1 | |
else: | |
count[c] += 1 | |
for c in classes: | |
print c | |
print "count: %s" % count[c] | |
print "total: %s" % total | |
sys.exit() | |
""" | |
クラスごとのウェイト平均, 全ての単語について, スムージングあり | |
""" | |
if False: | |
smoothing = 1 | |
for c in classes: | |
print c | |
weights = [] | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
weights.append( getClassWordWeight(c, word) + smoothing ) | |
avg = 0 | |
for weight in weights: | |
avg += weight | |
avg /= len(weights) | |
sd = 0 | |
for weight in weights: | |
sd += (weight - avg) ** 2 | |
sd /= len(weights) | |
sd = math.sqrt(sd) | |
print "avg: %s" % avg | |
print "sd: %s" % sd | |
sys.exit() | |
""" | |
クラスごとのウェイト平均, 全ての単語について | |
""" | |
if False: | |
for c in classes: | |
print c | |
weights = [] | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
weights.append( getClassWordWeight(c, word) ) | |
avg = 0 | |
for weight in weights: | |
avg += weight | |
avg /= len(weights) | |
sd = 0 | |
for weight in weights: | |
sd += (weight - avg) ** 2 | |
sd /= len(weights) | |
sd = math.sqrt(sd) | |
print "avg: %s" % avg | |
print "sd: %s" % sd | |
sys.exit() | |
""" | |
クラスごとのウェイト平均 | |
""" | |
if False: | |
for c in classes: | |
print c | |
weights = [] | |
cur = conn.execute("SELECT weight FROM class_word_weight WHERE class= ?", [c]) | |
for row in cur: | |
weight = row[0] | |
weights.append(weight) | |
avg = 0 | |
for weight in weights: | |
avg += weight | |
avg /= len(weights) | |
sd = 0 | |
for weight in weights: | |
sd += (weight - avg) ** 2 | |
sd /= len(weights) | |
sd = math.sqrt(sd) | |
print "avg: %s" % avg | |
print "sd: %s" % sd | |
sys.exit() | |
""" | |
docごとのウェイトの詳細出力 | |
""" | |
if False: | |
print "printing weight details " | |
for c in classes: | |
dir = "arcadia_weight\\" + c | |
try: | |
os.makedirs(dir) | |
except: | |
pass | |
print "class: %s" % c | |
files = getFiles(corpus + "\\" + c) | |
print "total: %s" % len(files) | |
files = files[-use_for_test:] | |
print "use for test: %s" % len(files) | |
fcount = 0 | |
for f in files: | |
fcount += 1 | |
print "fileNo: %s (%s / %s)" % (f, fcount, len(files)) | |
doc = open(corpus + "\\" + c + "\\" + f).read() | |
doc = unicode(doc, "utf-8", errors="replace") | |
words = getWords(doc) | |
data = toDataArray(words) | |
print "unique words: %s" % len(data) | |
allWeights = [] | |
for d in data: | |
word, count = d | |
weights = [] | |
for targetc in classes: | |
res = conn.execute("SELECT weight FROM class_word_weight WHERE class= ? AND word = ?", [targetc, word]).fetchone() | |
if res == None: | |
w = "NA" | |
else: | |
w = res[0] | |
weights.append(w) | |
if targetc == c: | |
weights.insert(0, count) | |
weights.insert(0, word) | |
weights.insert(0, w) | |
allWeights.append(tuple(weights)) | |
allWeights.sort(reverse=True) | |
fout = open(dir + "\\" + f, 'w') | |
line = [] | |
line.append("word") | |
line.append("count") | |
for targetc in classes: | |
line.append(targetc) | |
fout.write("\t".join([str(w) for w in line])) | |
fout.write("\n") | |
for w in allWeights: | |
try: | |
line = [] | |
#line.append("\"" + w[1].replace("\"", "__").encode("utf-8") + "\"") #word | |
line.append("\"" + urllib.quote(w[1].replace("\"", "__").encode("utf-8")) + "\"") #word | |
line.append(w[2]) #count | |
for d in w[3:]: | |
line.append(d) #weight | |
fout.write("\t".join([str(w) for w in line])) | |
fout.write("\n") | |
except: | |
pass | |
sys.exit() | |
""" | |
classごとのwordのweightの詳細出力 | |
""" | |
if False: | |
print "printing count and weight details " | |
fout = open('class_word_count_weight_detail.txt', 'w') | |
line = [] | |
line.append("classes") | |
line.append("word") | |
line.append("weight") | |
fout.write("\t".join([str(w) for w in line])) | |
fout.write("\n") | |
try: | |
for c in classes: | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
line = [] | |
line.append("\"" + c + "\"") | |
line.append("\"" + urllib.quote(word.replace("\"", "__").encode("utf-8")) + "\"") | |
res = conn.execute("SELECT weight FROM class_word_weight WHERE class= ? AND word = ?", [c, word]).fetchone() | |
if res == None: | |
line.append("NA") | |
else: | |
line.append(res[0]) | |
fout.write("\t".join([str(w) for w in line])) | |
fout.write("\n") | |
except: | |
pass | |
""" | |
classごとのwordのweightの詳細出力, ウェイトについて、IDF値で割る | |
""" | |
if False: | |
print "printing count and weight details " | |
fout = open('class_word_count_weight_detail_div_idf.txt', 'w') | |
line = [] | |
line.append("classes") | |
line.append("word") | |
line.append("weight") | |
fout.write("\t".join([str(w) for w in line])) | |
fout.write("\n") | |
try: | |
totalDocCount = getGlobalValue("totalDocCount") | |
for c in classes: | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
line = [] | |
line.append("\"" + c + "\"") | |
line.append("\"" + urllib.quote(word.replace("\"", "__").encode("utf-8")) + "\"") | |
res = conn.execute("SELECT weight FROM class_word_weight WHERE class= ? AND word = ?", [c, word]).fetchone() | |
if res == None: | |
line.append("NA") | |
else: | |
docCount = getWordDocCount(word) | |
idf = getIDF(docCount, totalDocCount) | |
if idf != 0: | |
line.append(res[0] / idf) | |
else: | |
line.append(0) | |
fout.write("\t".join([str(w) for w in line])) | |
fout.write("\n") | |
except: | |
pass | |
""" | |
ここまでで、tf-idfを用いて クラス - 単語 - ウェイト を求めている。 | |
4.1(TF transform) | |
単語頻度をべき乗分布に従うようにし、実際の単語頻度確率分布に近づける。 | |
4.2(IDF transform) | |
頻出単語のクラス分類への影響を低減させる。 | |
4.3(length norm) | |
長いドキュメントが重用されないよう、ドキュメント長での正規化。 | |
を適用している。 | |
getClassWordWeight(c, word) | |
""" | |
if True: | |
print "testing ..." | |
classWeights = { | |
"akamatu": 10338.780247, | |
"eva": 7652.34294295, | |
"ff": 4499.4428822, | |
"gs": 1214.35158326, | |
"HxH": 1957.9397041, | |
"muv": 14541.1070473, | |
"nade": 1122.17844428, | |
"naruto": 5903.74842047, | |
"original": 26770.7560395, | |
"sammon": 979.966347776, | |
"toraha": 24123.1373033, | |
"type": 14555.178196, | |
"zero": 10239.0622349 | |
} | |
if True: | |
print "-" * 30 | |
print "*class weights*" | |
classWeights = {} | |
classWeightsTotal = 0 | |
for c in classes: | |
classWeights[c] = getClassWeight(c) | |
classWeightsTotal += classWeights[c] | |
print "\"%s\": %s," % (c, classWeights[c]) | |
print "-" * 30 | |
print "-" * 30 | |
print "*class weights normalized*" | |
for c in classes: | |
print "\"%s\": %s," % (c, classWeights[c] / classWeightsTotal) | |
print "-" * 30 | |
class_weights = { | |
1 : { | |
"count_log" : { | |
"akamatu": 0.076803957698, | |
"eva": 0.0765281913222, | |
"ff": 0.0776863314586, | |
"gs": 0.0782604959874, | |
"HxH": 0.0781194882895, | |
"muv": 0.0764444270908, | |
"nade": 0.0781613567559, | |
"naruto": 0.0770410538314, | |
"original": 0.0745866402645, | |
"sammon": 0.0782463671122, | |
"toraha": 0.0756274083759, | |
"type": 0.0757030920232, | |
"zero": 0.0767911897903, | |
}, | |
"count" : { | |
"akamatu": 0.0817124441002, | |
"eva": 0.0372208887495, | |
"ff": 0.0278040000315, | |
"gs": 0.00382368938481, | |
"HxH": 0.00844992107636, | |
"muv": 0.153335036161, | |
"nade": 0.0018630685205, | |
"naruto": 0.0363817681699, | |
"original": 0.203180054696, | |
"sammon": 0.00328466417964, | |
"toraha": 0.271105060781, | |
"type": 0.0922599155798, | |
"zero": 0.0795794885704 | |
}, | |
"log" : { | |
"akamatu": 0.0769167788887, | |
"eva": 0.0769125068799, | |
"ff": 0.0768909989191, | |
"gs": 0.0768846856777, | |
"HxH": 0.0768855946199, | |
"muv": 0.0769452721712, | |
"nade": 0.0768852921445, | |
"naruto": 0.0769031605635, | |
"original": 0.0770171973361, | |
"sammon": 0.0768843171399, | |
"toraha": 0.0770047945764, | |
"type": 0.0769507056099, | |
"zero": 0.0769186954732 | |
}, | |
"log_complement" : { | |
"akamatu": 0.076868318422, | |
"eva": 0.0770057713849, | |
"ff": 0.0767609289076, | |
"gs": 0.0767150451683, | |
"HxH": 0.0767237198601, | |
"muv": 0.076940334663, | |
"nade": 0.0767342202184, | |
"naruto": 0.0768963973054, | |
"original": 0.0775000489223, | |
"sammon": 0.0767192394693, | |
"toraha": 0.0770806816713, | |
"type": 0.0771654693196, | |
"zero": 0.0768898246878, | |
} | |
}, | |
0.1: { | |
"count_log": { | |
"akamatu": 0.0762512836488, | |
"eva": 0.0749601826474, | |
"ff": 0.0782878349419, | |
"gs": 0.0805614637159, | |
"HxH": 0.0798892961017, | |
"muv": 0.0758655048967, | |
"nade": 0.0800124210774, | |
"naruto": 0.076244145346, | |
"original": 0.072583801544, | |
"sammon": 0.0804913988864, | |
"toraha": 0.0748565320527, | |
"type": 0.073825056199, | |
"zero": 0.0761710789423, | |
}, | |
"count": { | |
"akamatu": 0.0756639558836, | |
"eva": 0.0464631526662, | |
"ff": 0.0185410289536, | |
"gs": 0.00215203163463, | |
"HxH": 0.00448477587876, | |
"muv": 0.159492779821, | |
"nade": 0.00126296539504, | |
"naruto": 0.0416746791096, | |
"original": 0.202696157374, | |
"sammon": 0.00158442528543, | |
"toraha": 0.260535990882, | |
"type": 0.106342988214, | |
"zero": 0.0791050689028 | |
}, | |
"log": { | |
"akamatu": 0.07699133716, | |
"eva": 0.076845241008, | |
"ff": 0.0764467954692, | |
"gs": 0.0761991627296, | |
"HxH": 0.0762487503426, | |
"muv": 0.0774045672112, | |
"nade": 0.0762109372266, | |
"naruto": 0.0766707975439, | |
"original": 0.0782230139347, | |
"sammon": 0.076185786234, | |
"toraha": 0.0780864337276, | |
"type": 0.0774748960333, | |
"zero": 0.0770122813794 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768208954766, | |
"eva": 0.0771082955739, | |
"ff": 0.0768253492256, | |
"gs": 0.076840441902, | |
"HxH": 0.0768340577217, | |
"muv": 0.0768082872779, | |
"nade": 0.0768719322054, | |
"naruto": 0.076991293746, | |
"original": 0.0773086211446, | |
"sammon": 0.0768524950237, | |
"toraha": 0.0767250394207, | |
"type": 0.0771475661595, | |
"zero": 0.0768657251224, | |
} | |
}, | |
0.01: { | |
"count_log": { | |
"akamatu": 0.0769066375194, | |
"eva": 0.0739037067159, | |
"ff": 0.0778579980725, | |
"gs": 0.0797584560021, | |
"HxH": 0.0791186404913, | |
"muv": 0.0773352371408, | |
"nade": 0.0783064605351, | |
"naruto": 0.0750942542841, | |
"original": 0.0742536071925, | |
"sammon": 0.0793357349613, | |
"toraha": 0.0771055671215, | |
"type": 0.074318007383, | |
"zero": 0.0767056925805, | |
}, | |
"count": { | |
"akamatu": 0.0866491274899, | |
"eva": 0.065706559771, | |
"ff": 0.0270600176155, | |
"gs": 0.00434317823757, | |
"HxH": 0.00757625053516, | |
"muv": 0.161510359574, | |
"nade": 0.00295720211797, | |
"naruto": 0.0654134395297, | |
"original": 0.160667140588, | |
"sammon": 0.00288996606205, | |
"toraha": 0.212703142655, | |
"type": 0.109843093169, | |
"zero": 0.0926805226562 | |
}, | |
"log": { | |
"akamatu": 0.0779073235827, | |
"eva": 0.0775962381183, | |
"ff": 0.075760836814, | |
"gs": 0.0735975439802, | |
"HxH": 0.0742280213893, | |
"muv": 0.0790295088583, | |
"nade": 0.0736528865082, | |
"naruto": 0.0768763460077, | |
"original": 0.0803984430262, | |
"sammon": 0.0733801716318, | |
"toraha": 0.0800780482133, | |
"type": 0.0794299946095, | |
"zero": 0.0780646372604 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768101047625, | |
"eva": 0.0771377553335, | |
"ff": 0.0768614819677, | |
"gs": 0.0768978181705, | |
"HxH": 0.0768865536412, | |
"muv": 0.076764183868, | |
"nade": 0.0769321952581, | |
"naruto": 0.0770269395279, | |
"original": 0.077194430214, | |
"sammon": 0.0769128105813, | |
"toraha": 0.0765881596931, | |
"type": 0.0771212791979, | |
"zero": 0.0768662877843, | |
} | |
}, | |
0.001: { | |
"count_log": { | |
"akamatu": 0.0780185415292, | |
"eva": 0.0744963985378, | |
"ff": 0.0781212304252, | |
"gs": 0.0774890170339, | |
"HxH": 0.0780030329864, | |
"muv": 0.0787748459599, | |
"nade": 0.0754859561034, | |
"naruto": 0.0754724012128, | |
"original": 0.0757668347929, | |
"sammon": 0.0762996258978, | |
"toraha": 0.0787218466361, | |
"type": 0.075559194554, | |
"zero": 0.0777910743308, | |
}, | |
"count": { | |
"akamatu": 0.087973665158, | |
"eva": 0.0713664055092, | |
"ff": 0.0329470176543, | |
"gs": 0.00898003655301, | |
"HxH": 0.0123538226244, | |
"muv": 0.15620341616, | |
"nade": 0.00650070208851, | |
"naruto": 0.0755201410784, | |
"original": 0.146020928595, | |
"sammon": 0.006473791386, | |
"toraha": 0.194744686646, | |
"type": 0.106456586688, | |
"zero": 0.0944587998596 | |
}, | |
"log": { | |
"akamatu": 0.0782107352173, | |
"eva": 0.0787720800924, | |
"ff": 0.0762193791294, | |
"gs": 0.0726834267532, | |
"HxH": 0.0740859173818, | |
"muv": 0.0792287623342, | |
"nade": 0.0728466156321, | |
"naruto": 0.0779899472264, | |
"original": 0.0797516011932, | |
"sammon": 0.0720145093176, | |
"toraha": 0.0792177632836, | |
"type": 0.0802871127608, | |
"zero": 0.0786921496779 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768090069784, | |
"eva": 0.0771374611645, | |
"ff": 0.0768651848909, | |
"gs": 0.0769002184338, | |
"HxH": 0.0768892863743, | |
"muv": 0.0767625371869, | |
"nade": 0.076934847317, | |
"naruto": 0.0770284299144, | |
"original": 0.0771921511656, | |
"sammon": 0.0769160441484, | |
"toraha": 0.0765805790049, | |
"type": 0.0771147553449, | |
"zero": 0.0768694980761, | |
} | |
}, | |
0.0001: { | |
"count_log": { | |
"akamatu": 0.0780923730448, | |
"eva": 0.0746496463985, | |
"ff": 0.0782014629913, | |
"gs": 0.077194459967, | |
"HxH": 0.0779437865349, | |
"muv": 0.0788988453706, | |
"nade": 0.0753137295455, | |
"naruto": 0.07564764567, | |
"original": 0.0758632887556, | |
"sammon": 0.0757972084518, | |
"toraha": 0.0787955634873, | |
"type": 0.0757206257892, | |
"zero": 0.0778813639935, | |
}, | |
"count": { | |
"akamatu": 0.0877946516109, | |
"eva": 0.0718364824067, | |
"ff": 0.0337676403197, | |
"gs": 0.0104117347871, | |
"HxH": 0.0134384001925, | |
"muv": 0.154946393348, | |
"nade": 0.00765304224568, | |
"naruto": 0.0766786954138, | |
"original": 0.143815282776, | |
"sammon": 0.00776600792853, | |
"toraha": 0.191962313085, | |
"type": 0.105621987204, | |
"zero": 0.0943073686819 | |
}, | |
"log": { | |
"akamatu": 0.0774683894707, | |
"eva": 0.0791320992099, | |
"ff": 0.0765333112053, | |
"gs": 0.0741150313085, | |
"HxH": 0.0752868374142, | |
"muv": 0.0781419999124, | |
"nade": 0.0745485141111, | |
"naruto": 0.0785667689259, | |
"original": 0.0777005930993, | |
"sammon": 0.0734073877121, | |
"toraha": 0.076954612506, | |
"type": 0.0798603463815, | |
"zero": 0.078284108743 | |
}, | |
"log_complement": { | |
"akamatu": 0.076809105821, | |
"eva": 0.0771335341665, | |
"ff": 0.0768645996078, | |
"gs": 0.0768954693754, | |
"HxH": 0.0768854944305, | |
"muv": 0.0767662220885, | |
"nade": 0.0769300425931, | |
"naruto": 0.0770255558043, | |
"original": 0.0772051013097, | |
"sammon": 0.0769118675756, | |
"toraha": 0.0765895738483, | |
"type": 0.0771111032019, | |
"zero": 0.0768723301775, | |
} | |
}, | |
0.00001: { | |
"log_count": { | |
"akamatu": 0.0779002954313, | |
"eva": 0.0746198573607, | |
"ff": 0.0781516623642, | |
"gs": 0.0774355417012, | |
"HxH": 0.078062502327, | |
"muv": 0.0787193133767, | |
"nade": 0.075844989475, | |
"naruto": 0.0756769264951, | |
"original": 0.0756491702872, | |
"sammon": 0.0760771991106, | |
"toraha": 0.0785395909979, | |
"type": 0.0756113949565, | |
"zero": 0.0777115561166, | |
}, | |
"count": { | |
"akamatu": 0.0877666570356, | |
"eva": 0.0718771209795, | |
"ff": 0.0338510695395, | |
"gs": 0.0105852858453, | |
"HxH": 0.013560346216, | |
"muv": 0.154801162357, | |
"nade": 0.00779482272841, | |
"naruto": 0.076791296446, | |
"original": 0.143576409062, | |
"sammon": 0.00792963326056, | |
"toraha": 0.191659569346, | |
"type": 0.105525194695, | |
"zero": 0.09428143249 | |
}, | |
"log": { | |
"akamatu": 0.0766722033925, | |
"eva": 0.0792740159204, | |
"ff": 0.0767142005174, | |
"gs": 0.0757035173, | |
"HxH": 0.07644277834, | |
"muv": 0.0770362397981, | |
"nade": 0.0764108290148, | |
"naruto": 0.0789193706818, | |
"original": 0.0757889916054, | |
"sammon": 0.0750999310093, | |
"toraha": 0.0748666230714, | |
"type": 0.0793042247333, | |
"zero": 0.0777670746155 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768093307345, | |
"eva": 0.0771292494819, | |
"ff": 0.0768635805306, | |
"gs": 0.0768900025971, | |
"HxH": 0.0768810447395, | |
"muv": 0.0767704444539, | |
"nade": 0.0769244892152, | |
"naruto": 0.0770222444065, | |
"original": 0.0772195844618, | |
"sammon": 0.0769069458823, | |
"toraha": 0.0766002382159, | |
"type": 0.07710775636, | |
"zero": 0.0768750889207, | |
} | |
}, | |
0.000001: { | |
"log_count": { | |
"akamatu": 0.0776802445669, | |
"eva": 0.0745687723841, | |
"ff": 0.0780863657591, | |
"gs": 0.0777347837739, | |
"HxH": 0.0781979014009, | |
"muv": 0.0785078202681, | |
"nade": 0.0764504623561, | |
"naruto": 0.0756880491118, | |
"original": 0.0754029154941, | |
"sammon": 0.0764458096232, | |
"toraha": 0.0782499260423, | |
"type": 0.0754728302138, | |
"zero": 0.0775141190057, | |
}, | |
"count": { | |
"akamatu": 0.0877637342908, | |
"eva": 0.0718811026902, | |
"ff": 0.0338594194431, | |
"gs": 0.0106030098068, | |
"HxH": 0.0135726913015, | |
"muv": 0.154786405451, | |
"nade": 0.00780932820254, | |
"naruto": 0.0768025052285, | |
"original": 0.143552304321, | |
"sammon": 0.00794643264095, | |
"toraha": 0.19162900391, | |
"type": 0.105515355829, | |
"zero": 0.0942787068858 | |
}, | |
"log": { | |
"akamatu": 0.0759959218038, | |
"eva": 0.0793751006252, | |
"ff": 0.0768547708465, | |
"gs": 0.0770568151608, | |
"HxH": 0.0774128113192, | |
"muv": 0.076101924644, | |
"nade": 0.0779959724918, | |
"naruto": 0.0791974414203, | |
"original": 0.0741883143549, | |
"sammon": 0.076554810523, | |
"toraha": 0.0731201540118, | |
"type": 0.0788243189481, | |
"zero": 0.0773216438505 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768095678823, | |
"eva": 0.0771249373728, | |
"ff": 0.0768625201368, | |
"gs": 0.0768844746722, | |
"HxH": 0.0768765379101, | |
"muv": 0.0767747124247, | |
"nade": 0.0769188717703, | |
"naruto": 0.0770188957282, | |
"original": 0.0772341929426, | |
"sammon": 0.0769019592646, | |
"toraha": 0.0766110487998, | |
"type": 0.0771044464911, | |
"zero": 0.0768778346044, | |
} | |
}, | |
0.0000001: { | |
"count_log": { | |
"akamatu": 0.0774598449828, | |
"eva": 0.0745161216571, | |
"ff": 0.0780202342653, | |
"gs": 0.0780365066339, | |
"HxH": 0.0783334377885, | |
"muv": 0.0782954903244, | |
"nade": 0.0770566248788, | |
"naruto": 0.0756972247399, | |
"original": 0.0751561942427, | |
"sammon": 0.0768192033893, | |
"toraha": 0.0779601222388, | |
"type": 0.0753328787885, | |
"zero": 0.07731611607, | |
}, | |
"count": { | |
"akamatu": 0.0877634407559, | |
"eva": 0.071881500017, | |
"ff": 0.0338602544945, | |
"gs": 0.0106047859704, | |
"HxH": 0.013573927332, | |
"muv": 0.154784927375, | |
"nade": 0.00781078210065, | |
"naruto": 0.0768036255699, | |
"original": 0.143549891631, | |
"sammon": 0.0079481170654, | |
"toraha": 0.191625944399, | |
"type": 0.105514370313, | |
"zero": 0.0942784329759 | |
}, | |
"log": { | |
"akamatu": 0.075427080173, | |
"eva": 0.0794583624502, | |
"ff": 0.076971817242, | |
"gs": 0.0781954798292, | |
"HxH": 0.078227651215, | |
"muv": 0.0753164901562, | |
"nade": 0.0793295879658, | |
"naruto": 0.0794293909472, | |
"original": 0.0728440233786, | |
"sammon": 0.0777801322794, | |
"toraha": 0.0716535980466, | |
"type": 0.0784199750961, | |
"zero": 0.0769464112207 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768098057981, | |
"eva": 0.0771206307573, | |
"ff": 0.0768614576473, | |
"gs": 0.0768789511982, | |
"HxH": 0.0768720339827, | |
"muv": 0.0767789767999, | |
"nade": 0.076913258656, | |
"naruto": 0.0770155497224, | |
"original": 0.0772487860473, | |
"sammon": 0.0768969756883, | |
"toraha": 0.0766218533355, | |
"type": 0.0771011466239, | |
"zero": 0.076880573743, | |
} | |
}, | |
0.00000001: { | |
"count_log": { | |
"akamatu": 0.0772418338363, | |
"eva": 0.0744638956495, | |
"ff": 0.0779547474875, | |
"gs": 0.0783351567524, | |
"HxH": 0.0784674975569, | |
"muv": 0.0780854121642, | |
"nade": 0.0776561920649, | |
"naruto": 0.0757061079394, | |
"original": 0.0749121392586, | |
"sammon": 0.0771889633168, | |
"toraha": 0.0776734905092, | |
"type": 0.0751943294385, | |
"zero": 0.0771202340258, | |
}, | |
"count": { | |
"akamatu": 0.0877634113898, | |
"eva": 0.0718815397412, | |
"ff": 0.0338603380002, | |
"gs": 0.0106049636246, | |
"HxH": 0.0135740509502, | |
"muv": 0.154784779543, | |
"nade": 0.00781092752405, | |
"naruto": 0.0768037375987, | |
"original": 0.14354965034, | |
"sammon": 0.00794828555283, | |
"toraha": 0.191625638418, | |
"type": 0.105514271746, | |
"zero": 0.0942784055714 | |
}, | |
"log": { | |
"akamatu": 0.0749431204614, | |
"eva": 0.0795290371209, | |
"ff": 0.077071287961, | |
"gs": 0.0791642677641, | |
"HxH": 0.0789208012298, | |
"muv": 0.0746482986623, | |
"nade": 0.0804642307083, | |
"naruto": 0.0796265495841, | |
"original": 0.0717005189699, | |
"sammon": 0.0788227586966, | |
"toraha": 0.0704061066145, | |
"type": 0.0780759041939, | |
"zero": 0.0766271180331 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768100433374, | |
"eva": 0.0771163328925, | |
"ff": 0.0768603969728, | |
"gs": 0.0768734386889, | |
"HxH": 0.0768675389235, | |
"muv": 0.0767832326941, | |
"nade": 0.0769076566651, | |
"naruto": 0.0770122103564, | |
"original": 0.0772633498228, | |
"sammon": 0.0768920019077, | |
"toraha": 0.0766326366881, | |
"type": 0.0770978540391, | |
"zero": 0.0768833070117, | |
} | |
}, | |
0.000000001: { | |
"count_log": { | |
"akamatu": 0.0770264385046, | |
"eva": 0.0744122817982, | |
"ff": 0.0778900393121, | |
"gs": 0.0786302430614, | |
"HxH": 0.0785999480243, | |
"muv": 0.0778778497505, | |
"nade": 0.0782485626974, | |
"naruto": 0.0757148653935, | |
"original": 0.0746710118418, | |
"sammon": 0.0775543276262, | |
"toraha": 0.0773903010702, | |
"type": 0.075057431106, | |
"zero": 0.0769266998139, | |
}, | |
"count": { | |
"akamatu": 0.0877634084531, | |
"eva": 0.0718815437135, | |
"ff": 0.0338603463508, | |
"gs": 0.0106049813904, | |
"HxH": 0.0135740633122, | |
"muv": 0.15478476476, | |
"nade": 0.00781094206672, | |
"naruto": 0.0768037488015, | |
"original": 0.143549626211, | |
"sammon": 0.00794830240203, | |
"toraha": 0.19162560782, | |
"type": 0.105514261889, | |
"zero": 0.0942784028308 | |
}, | |
"log": { | |
"akamatu": 0.0745264717067, | |
"eva": 0.0795898669339, | |
"ff": 0.0771569136676, | |
"gs": 0.0799983161095, | |
"HxH": 0.079517535966, | |
"muv": 0.0740730456274, | |
"nade": 0.0814410656171, | |
"naruto": 0.079796269937, | |
"original": 0.0707160754542, | |
"sammon": 0.0797203862322, | |
"toraha": 0.0693321421226, | |
"type": 0.0777796821406, | |
"zero": 0.0763522284853 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768102803871, | |
"eva": 0.0771120440774, | |
"ff": 0.0768593384972, | |
"gs": 0.0768679377608, | |
"HxH": 0.0768630533009, | |
"muv": 0.0767874796453, | |
"nade": 0.0769020664415, | |
"naruto": 0.0770088780059, | |
"original": 0.077277882965, | |
"sammon": 0.076887038567, | |
"toraha": 0.0766433974122, | |
"type": 0.077094568445, | |
"zero": 0.0768860344947, | |
} | |
}, | |
0.0000000001: { | |
"count_log": { | |
"akamatu": 0.0768136384817, | |
"eva": 0.0743612884056, | |
"ff": 0.0778261100987, | |
"gs": 0.0789217758045, | |
"HxH": 0.078730802512, | |
"muv": 0.0776727877809, | |
"nade": 0.0788337955667, | |
"naruto": 0.0757235154245, | |
"original": 0.0744327897079, | |
"sammon": 0.0779152937141, | |
"toraha": 0.0771105241055, | |
"type": 0.0749221811379, | |
"zero": 0.07673549726, | |
}, | |
"count": { | |
"akamatu": 0.0877634081594, | |
"eva": 0.0718815441108, | |
"ff": 0.0338603471859, | |
"gs": 0.0106049831669, | |
"HxH": 0.0135740645484, | |
"muv": 0.154784763281, | |
"nade": 0.007810943521, | |
"naruto": 0.0768037499218, | |
"original": 0.143549623798, | |
"sammon": 0.00794830408695, | |
"toraha": 0.19162560476, | |
"type": 0.105514260903, | |
"zero": 0.0942784025568 | |
}, | |
"log": { | |
"akamatu": 0.0741640144616, | |
"eva": 0.0796427834868, | |
"ff": 0.0772314014908, | |
"gs": 0.080723884071, | |
"HxH": 0.0800366553981, | |
"muv": 0.0735726133606, | |
"nade": 0.0822908485295, | |
"naruto": 0.0799439140127, | |
"original": 0.0698596754713, | |
"sammon": 0.0805012649201, | |
"toraha": 0.0683978650218, | |
"type": 0.0775219877257, | |
"zero": 0.0761130920499 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768105169376, | |
"eva": 0.0771077643158, | |
"ff": 0.0768582822526, | |
"gs": 0.0768624484425, | |
"HxH": 0.0768585771443, | |
"muv": 0.0767917176332, | |
"nade": 0.0768964880159, | |
"naruto": 0.0770055526882, | |
"original": 0.0772923854314, | |
"sammon": 0.0768820857004, | |
"toraha": 0.0766541354285, | |
"type": 0.0770912897924, | |
"zero": 0.0768887562172, | |
} | |
}, | |
0.00000000001: { | |
"count_log": { | |
"akamatu": 0.0766033897454, | |
"eva": 0.0743109062364, | |
"ff": 0.0777629472703, | |
"gs": 0.0792098135176, | |
"HxH": 0.0788600881609, | |
"muv": 0.0774701842772, | |
"nade": 0.0794120119783, | |
"naruto": 0.0757320615598, | |
"original": 0.0741974236423, | |
"sammon": 0.0782719325382, | |
"toraha": 0.0768341014534, | |
"type": 0.0747885525866, | |
"zero": 0.0765465870337, | |
}, | |
"count": { | |
"akamatu": 0.08776340813, | |
"eva": 0.0718815441505, | |
"ff": 0.0338603472694, | |
"gs": 0.0106049833446, | |
"HxH": 0.013574064672, | |
"muv": 0.154784763134, | |
"nade": 0.00781094366642, | |
"naruto": 0.0768037500338, | |
"original": 0.143549623556, | |
"sammon": 0.00794830425544, | |
"toraha": 0.191625604454, | |
"type": 0.105514260804, | |
"zero": 0.0942784025294 | |
}, | |
"log": { | |
"akamatu": 0.0738458226359, | |
"eva": 0.0796892374256, | |
"ff": 0.0772967923307, | |
"gs": 0.0813608414149, | |
"HxH": 0.0804923768176, | |
"muv": 0.0731332968716, | |
"nade": 0.0830368509551, | |
"naruto": 0.0800735267761, | |
"original": 0.0691078642702, | |
"sammon": 0.081186778209, | |
"toraha": 0.0675776875439, | |
"type": 0.0772957644232, | |
"zero": 0.0759031603262 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768107529891, | |
"eva": 0.0771034935823, | |
"ff": 0.0768572282358, | |
"gs": 0.0768569707035, | |
"HxH": 0.0768541104299, | |
"muv": 0.0767959466814, | |
"nade": 0.0768909213577, | |
"naruto": 0.0770022343852, | |
"original": 0.0773068573054, | |
"sammon": 0.0768771432815, | |
"toraha": 0.0766648507938, | |
"type": 0.0770880180566, | |
"zero": 0.0768914721978, | |
} | |
}, | |
0.000000000001: { | |
"count_log": { | |
"akamatu": 0.0763956469456, | |
"eva": 0.0742611245535, | |
"ff": 0.0777005372671, | |
"gs": 0.0794944181534, | |
"HxH": 0.0789878328646, | |
"muv": 0.0772699955822, | |
"nade": 0.0799833366764, | |
"naruto": 0.0757405058157, | |
"original": 0.0739648628837, | |
"sammon": 0.0786243206554, | |
"toraha": 0.076560973462, | |
"type": 0.0746565167312, | |
"zero": 0.0763599284094, | |
}, | |
"count": { | |
"akamatu": 0.0877634081271, | |
"eva": 0.0718815441545, | |
"ff": 0.0338603472777, | |
"gs": 0.0106049833624, | |
"HxH": 0.0135740646844, | |
"muv": 0.154784763119, | |
"nade": 0.00781094368097, | |
"naruto": 0.076803750045, | |
"original": 0.143549623532, | |
"sammon": 0.00794830427229, | |
"toraha": 0.191625604423, | |
"type": 0.105514260795, | |
"zero": 0.0942784025266 | |
}, | |
"log": { | |
"akamatu": 0.0735642540501, | |
"eva": 0.0797303445963, | |
"ff": 0.077354656808, | |
"gs": 0.081924486251, | |
"HxH": 0.0808956456087, | |
"muv": 0.0727445448312, | |
"nade": 0.0836969900022, | |
"naruto": 0.0801882213565, | |
"original": 0.0684425850438, | |
"sammon": 0.0817933903066, | |
"toraha": 0.0668519108644, | |
"type": 0.0770955789636, | |
"zero": 0.0757173913177 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768109885432, | |
"eva": 0.0770992318487, | |
"ff": 0.0768561764402, | |
"gs": 0.0768515045081, | |
"HxH": 0.0768496531284, | |
"muv": 0.0768001668174, | |
"nade": 0.0768853664303, | |
"naruto": 0.0769989230749, | |
"original": 0.0773212986821, | |
"sammon": 0.0768722112779, | |
"toraha": 0.0766755435783, | |
"type": 0.0770847532156, | |
"zero": 0.076894182455, | |
} | |
}, | |
0.0000000000001: { | |
"count_log": { | |
"akamatu": 0.076190365571, | |
"eva": 0.0742119327039, | |
"ff": 0.0776388667235, | |
"gs": 0.079775650674, | |
"HxH": 0.0791140639945, | |
"muv": 0.0770721788077, | |
"nade": 0.0805478920755, | |
"naruto": 0.0757488500185, | |
"original": 0.073735057604, | |
"sammon": 0.078972533532, | |
"toraha": 0.0762910816077, | |
"type": 0.0745260452918, | |
"zero": 0.0761754813959, | |
}, | |
"count": { | |
"akamatu": 0.0877634081268, | |
"eva": 0.0718815441549, | |
"ff": 0.0338603472786, | |
"gs": 0.0106049833641, | |
"HxH": 0.0135740646856, | |
"muv": 0.154784763117, | |
"nade": 0.00781094368242, | |
"naruto": 0.0768037500461, | |
"original": 0.14354962353, | |
"sammon": 0.00794830427398, | |
"toraha": 0.19162560442, | |
"type": 0.105514260794, | |
"zero": 0.0942784025264 | |
}, | |
"log": { | |
"akamatu": 0.0733133298964, | |
"eva": 0.0797669778797, | |
"ff": 0.077406223621, | |
"gs": 0.0824267869713, | |
"HxH": 0.0812550247629, | |
"muv": 0.072398102502, | |
"nade": 0.0842852830169, | |
"naruto": 0.080290433185, | |
"original": 0.0678497112818, | |
"sammon": 0.0823339819609, | |
"toraha": 0.0662051238713, | |
"type": 0.076917180628, | |
"zero": 0.0755518404228 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768112236014, | |
"eva": 0.0770949790866, | |
"ff": 0.0768551268587, | |
"gs": 0.0768460498197, | |
"HxH": 0.0768452052101, | |
"muv": 0.0768043780695, | |
"nade": 0.0768798231968, | |
"naruto": 0.0769956187354, | |
"original": 0.0773357096578, | |
"sammon": 0.0768672896569, | |
"toraha": 0.0766862138529, | |
"type": 0.0770814952475, | |
"zero": 0.0768968870066, | |
} | |
}, | |
0.0000000000001: { | |
"count_log": { | |
"akamatu": 0.0759875021358, | |
"eva": 0.0741633202682, | |
"ff": 0.0775779225762, | |
"gs": 0.0800535706528, | |
"HxH": 0.0792388082909, | |
"muv": 0.0768766920497, | |
"nade": 0.0811057977688, | |
"naruto": 0.0757570959377, | |
"original": 0.0735079591224, | |
"sammon": 0.079316644928, | |
"toraha": 0.0760243687178, | |
"type": 0.0743971106309, | |
"zero": 0.0759932069209, | |
}, | |
"count": { | |
"akamatu": 0.0877634081268, | |
"eva": 0.0718815441549, | |
"ff": 0.0338603472787, | |
"gs": 0.0106049833643, | |
"HxH": 0.0135740646858, | |
"muv": 0.154784763117, | |
"nade": 0.00781094368256, | |
"naruto": 0.0768037500463, | |
"original": 0.14354962353, | |
"sammon": 0.00794830427415, | |
"toraha": 0.19162560442, | |
"type": 0.105514260793, | |
"zero": 0.0942784025263 | |
}, | |
"log": { | |
"akamatu": 0.0730883055965, | |
"eva": 0.0797998299535, | |
"ff": 0.0774524678187, | |
"gs": 0.082877241288, | |
"HxH": 0.0815773095695, | |
"muv": 0.0720874192083, | |
"nade": 0.0848128536856, | |
"naruto": 0.0803820949283, | |
"original": 0.0673180326798, | |
"sammon": 0.0828187749028, | |
"toraha": 0.065625096847, | |
"type": 0.0767571961857, | |
"zero": 0.0754033773364 | |
}, | |
"log_complement": { | |
"akamatu": 0.0768114581653, | |
"eva": 0.0770907352678, | |
"ff": 0.0768540794844, | |
"gs": 0.076840606602, | |
"HxH": 0.0768407666455, | |
"muv": 0.0768085804657, | |
"nade": 0.0768742916203, | |
"naruto": 0.0769923213447, | |
"original": 0.0773500903283, | |
"sammon": 0.0768623783856, | |
"toraha": 0.0766968616889, | |
"type": 0.0770782441306, | |
"zero": 0.0768995858708, | |
} | |
} | |
} | |
""" | |
s_params = [1, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001, | |
0.00000001, 0.000000001, 0.0000000001, 0.00000000001, 0.000000000001, 0.0000000000001, 0.00000000000001] | |
""" | |
s_params = [1, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001, | |
0.00000001, 0.000000001, 0.0000000001, 0.00000000001, 0.000000000001, 0.0000000000001, 0.00000000000001, | |
0.000000000000001, 0.0000000000000001, 0.00000000000000001, 0.000000000000000001, 0.0000000000000000001, 0.00000000000000000001, | |
0.000000000000000000001, 0.0000000000000000000001, 0.00000000000000000000001, 0.000000000000000000000001, 0.0000000000000000000000001, | |
0.000000000000000000000000001, 0.000000000000000000000000001, 0.0000000000000000000000000001, 0.00000000000000000000000000001] | |
for s in s_params: | |
s_all = s * conn.execute("SELECT COUNT(DISTINCT word) FROM word_doc_count").fetchone()[0] | |
print "s: %s" % s | |
print "s_all: %s" % s_all | |
if False: | |
#classごとのwordのweight計算、countの影響を含める | |
if False: | |
print "-" * 30 | |
print "each classes -> count[c] * log((w + s) / (cw + s_all))" | |
cw = {} | |
cw_all = 0 | |
for c in classes: | |
cw[c] = 0 | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
weight = getClassWordWeight(c, word) | |
count = getClassWordCount(c, word) | |
cw[c] += float(count) * math.log((weight + s) / (classWeights[c] + s_all)) | |
cw_all += cw[c] | |
for c in classes: | |
cw[c] /= cw_all | |
print "\"%s\": %s," % (c, cw[c]) | |
print "-" * 30 | |
if True: | |
print "-" * 30 | |
print "each classes -> count * log((w + s) / (cw + s_all))" | |
cw = {} | |
cw_all = 0 | |
for c in classes: | |
cw[c] = 0 | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
count = 0 | |
for ec in classes: | |
count += getClassWordCount(ec, word) | |
weight = getClassWordWeight(c, word) | |
cw[c] += float(count) * math.log((weight + s) / (classWeights[c] + s_all)) | |
cw_all += cw[c] | |
for c in classes: | |
cw[c] /= cw_all | |
print "\"%s\": %s," % (c, cw[c]) | |
print "-" * 30 | |
#classごとのwordのweight計算、countの影響を含める | |
if False: | |
print "-" * 30 | |
print "each classes -> count[c] * ((w + s) / (cw + s_all))" | |
cw = {} | |
cw_all = 0 | |
for c in classes: | |
cw[c] = 0 | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
weight = getClassWordWeight(c, word) | |
count = getClassWordCount(c, word) | |
cw[c] += float(count) * ((weight + s) / (classWeights[c] + s_all)) | |
cw_all += cw[c] | |
for c in classes: | |
cw[c] /= cw_all | |
print "\"%s\": %s," % (c, cw[c]) | |
print "-" * 30 | |
#classごとのwordのweight計算、countの影響を含める | |
if True: | |
print "-" * 30 | |
print "each classes -> count * ((w + s) / (cw + s_all))" | |
cw = {} | |
cw_all = 0 | |
for c in classes: | |
cw[c] = 0 | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
count = 0 | |
for ec in classes: | |
count += getClassWordCount(ec, word) | |
weight = getClassWordWeight(c, word) | |
cw[c] += float(count) * ((weight + s) / (classWeights[c] + s_all)) | |
cw_all += cw[c] | |
for c in classes: | |
cw[c] /= cw_all | |
print "\"%s\": %s," % (c, cw[c]) | |
print "-" * 30 | |
#NB で使うクラスごとのウェイト | |
if False: | |
print "-" * 30 | |
print "each classes -> log((w + s) / (cw + s_all))" | |
cw = {} | |
cw_all = 0 | |
for c in classes: | |
cw[c] = 0 | |
denominator = classWeights[c] | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
numerator = getClassWordWeight(c, word) | |
cw[c] += math.log( (numerator + s) / (denominator + s_all) ) | |
cw_all += cw[c] | |
for c in classes: | |
cw[c] /= cw_all | |
print "\"%s\": %s," % (c, cw[c]) | |
print "-" * 30 | |
if False: | |
print "-" * 30 | |
print "each classes -> ((w + s) / (cw + s_all))" | |
cw = {} | |
cw_all = 0 | |
for c in classes: | |
cw[c] = 0 | |
denominator = classWeights[c] | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
numerator = getClassWordWeight(c, word) | |
cw[c] += (numerator + s) / (denominator + s_all) | |
cw_all += cw[c] | |
for c in classes: | |
cw[c] /= cw_all | |
print "\"%s\": %s," % (c, cw[c]) | |
print "-" * 30 | |
#TWCNB 6で使うクラスごとのウェイト | |
if False: | |
print "-" * 30 | |
print "each classes -> log((w[^c] + s) / (cw[^c] + s_all))" | |
cw = {} | |
cw_all = 0 | |
for c in classes: | |
cw[c] = 0 | |
denominator = 0 | |
for ec in classes: | |
if ec != c: | |
denominator += classWeights[ec] | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
numerator = 0 | |
for ec in classes: | |
if ec != c: | |
numerator += getClassWordWeight(ec, word) | |
cw[c] += math.log( (numerator + s) / (denominator + s_all) ) | |
cw_all += cw[c] | |
for c in classes: | |
cw[c] /= cw_all | |
print "\"%s\": %s," % (c, cw[c]) | |
print "-" * 30 | |
#↑* count | |
if True: | |
print "-" * 30 | |
print "each classes -> count[c] * log((w[^c] + s) / (cw[^c] + s_all))" | |
cw = {} | |
cw_all = 0 | |
for c in classes: | |
cw[c] = 0 | |
denominator = 0 | |
for ec in classes: | |
if ec != c: | |
denominator += classWeights[ec] | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
numerator = 0 | |
for ec in classes: | |
if ec != c: | |
numerator += getClassWordWeight(ec, word) | |
count = getClassWordCount(c, word) | |
cw[c] += float(count) * math.log( (numerator + s) / (denominator + s_all) ) | |
cw_all += cw[c] | |
for c in classes: | |
cw[c] /= cw_all | |
print "\"%s\": %s," % (c, cw[c]) | |
print "-" * 30 | |
if True: | |
print "-" * 30 | |
print "each classes -> count * log((w[^c] + s) / (cw[^c] + s_all))" | |
cw = {} | |
cw_all = 0 | |
for c in classes: | |
cw[c] = 0 | |
denominator = 0 | |
for ec in classes: | |
if ec != c: | |
denominator += classWeights[ec] | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
count = 0 | |
for ec in classes: | |
count += getClassWordCount(ec, word) | |
numerator = 0 | |
for ec in classes: | |
if ec != c: | |
numerator += getClassWordWeight(ec, word) | |
cw[c] += float(count) * math.log( (numerator + s) / (denominator + s_all) ) | |
cw_all += cw[c] | |
for c in classes: | |
cw[c] /= cw_all | |
print "\"%s\": %s," % (c, cw[c]) | |
print "-" * 30 | |
if True: | |
print "-" * 30 | |
print "each classes -> count[^c] * log((w[^c] + s) / (cw[^c] + s_all))" | |
cw = {} | |
cw_all = 0 | |
for c in classes: | |
cw[c] = 0 | |
denominator = 0 | |
for ec in classes: | |
if ec != c: | |
denominator += classWeights[ec] | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
count = 0 | |
for ec in classes: | |
if ec != c: | |
count += getClassWordCount(ec, word) | |
numerator = 0 | |
for ec in classes: | |
if ec != c: | |
numerator += getClassWordWeight(ec, word) | |
cw[c] += float(count) * math.log( (numerator + s) / (denominator + s_all) ) | |
cw_all += cw[c] | |
for c in classes: | |
cw[c] /= cw_all | |
print "\"%s\": %s," % (c, cw[c]) | |
print "-" * 30 | |
continue | |
""" | |
誤ったスムージングパラメータでのデータ | |
classComplementWeightsForNorm = { | |
"akamatu": -3080841.17686, | |
"eva": -3086012.45238, | |
"ff": -3092460.60499, | |
"gs": -3098540.73891, | |
"HxH": -3097159.98922, | |
"muv": -3072167.23895, | |
"nade": -3098715.89665, | |
"naruto": -3089468.30269, | |
"original": -3044780.5968, | |
"sammon": -3098987.98064, | |
"toraha": -3050861.25974, | |
"type": -3071706.76705, | |
"zero": -3081028.99847 | |
} | |
if False: | |
#TWCNB 6で使うクラスごとのウェイト | |
print "computing classComplementWeightsForNorm" | |
classComplementWeightsForNorm = {} | |
for c in classes: | |
denominator = 0 | |
for ec in classes: | |
if ec != c: | |
denominator += classWeights[ec] | |
classWeight = 0 | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
#cur = conn.execute("SELECT word FROM class_word_weight WHERE class = ?", [c]) | |
for row in cur: | |
word = row[0] | |
numerator = 0 | |
for ec in classes: | |
if ec != c: | |
numerator += getClassWordWeight(ec, word) | |
classWeight += math.log( (numerator + smoothing) / (denominator + smoothingAll) ) | |
classComplementWeightsForNorm[c] = classWeight | |
print "%s: %s" % (c, classWeight) | |
""" | |
""" | |
語彙集合全体でウェイト計算してるが、悪影響 | |
classComplementWeightsForNorm = { | |
"akamatu": -3408574.74087, | |
"eva": -3409327.91309, | |
"ff": -3410757.3117, | |
"gs": -3411789.25816, | |
"HxH": -3411535.54758, | |
"muv": -3407090.61123, | |
"nade": -3411825.33577, | |
"naruto": -3409978.75976, | |
"original": -3402823.41769, | |
"sammon": -3411883.10451, | |
"toraha": -3403595.02465, | |
"type": -3406654.81041, | |
"zero": -3408596.15052 | |
} | |
if False: | |
#TWCNB 6で使うクラスごとのウェイト | |
print "computing classComplementWeightsForNorm" | |
classComplementWeightsForNorm = {} | |
for c in classes: | |
denominator = 0 | |
for ec in classes: | |
if ec != c: | |
denominator += classWeights[ec] | |
classWeight = 0 | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
numerator = 0 | |
for ec in classes: | |
if ec != c: | |
numerator += getClassWordWeight(ec, word) | |
classWeight += math.log( (numerator + smoothing) / (denominator + smoothingAll) ) | |
classComplementWeightsForNorm[c] = classWeight | |
print "%s: %s" % (c, classWeight) | |
""" | |
""" | |
classComplementWeightsForNorm = { | |
"akamatu": -1069934.25368, | |
"eva": -683627.513276, | |
"ff": -754925.208654, | |
"gs": -327454.900299, | |
"HxH": -447114.888567, | |
"muv": -1157362.67506, | |
"nade": -219929.813111, | |
"naruto": -628259.562952, | |
"original": -1456057.22919, | |
"sammon": -309198.554574, | |
"toraha": -1543564.08653, | |
"type": -905372.088819, | |
"zero": -943972.752648 | |
} | |
if False: | |
#TWCNB 6で使うクラスごとのウェイト | |
print "computing classComplementWeightsForNorm" | |
classComplementWeightsForNorm = {} | |
for c in classes: | |
denominator = 0 | |
for ec in classes: | |
if ec != c: | |
denominator += classWeights[ec] | |
classWeight = 0 | |
cur = conn.execute("SELECT DISTINCT word FROM class_word_weight WHERE class = ?", [c]) | |
for row in cur: | |
word = row[0] | |
numerator = 0 | |
for ec in classes: | |
if ec != c: | |
numerator += getClassWordWeight(ec, word) | |
classWeight += math.log( (numerator + smoothing) / (denominator + smoothingAll) ) | |
classComplementWeightsForNorm[c] = classWeight | |
print "\"%s\": %s," % (c, classWeight) | |
""" | |
""" | |
誤ったスムージングパラメータでのデータ | |
classWeightsForNorm = { | |
"akamatu": -2483221.10996, | |
"eva": -2404639.80365, | |
"ff": -2263931.79348, | |
"gs": -1915803.53723, | |
"HxH": -2042857.52367, | |
"muv": -2572298.69609, | |
"nade": -1894860.99644, | |
"naruto": -2336134.19345, | |
"original": -2728500.99277, | |
"sammon": -1858847.71614, | |
"toraha": -2702319.70272, | |
"type": -2572784.03741, | |
"zero": -2480791.83441 | |
} | |
if False: | |
#NB で使うクラスごとのウェイト | |
print "computing classWeightsForNorm" | |
classWeightsForNorm = {} | |
for c in classes: | |
denominator = classWeights[c] | |
classWeight = 0 | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
#cur = conn.execute("SELECT word FROM class_word_weight WHERE class = ?", [c]) | |
for row in cur: | |
word = row[0] | |
numerator = getClassWordWeight(c, word) | |
classWeight += math.log( (numerator + smoothing) / (denominator + smoothingAll) ) | |
classWeightsForNorm[c] = classWeight | |
print "%s: %s" % (c, classWeight) | |
""" | |
""" | |
語彙集合全体でウェイト計算してるが、悪影響 | |
classWeightsForNorm = { | |
"akamatu": -3371995.21947, | |
"eva": -3371807.93663, | |
"ff": -3370865.0378, | |
"gs": -3370588.26828, | |
"HxH": -3370628.11587, | |
"muv": -3373244.3515, | |
"nade": -3370614.85549, | |
"naruto": -3371398.19853, | |
"original": -3376397.51673, | |
"sammon": -3370572.11175, | |
"toraha": -3375853.7856, | |
"type": -3373482.55089, | |
"zero": -3372079.24163 | |
} | |
if False: | |
#NB で使うクラスごとのウェイト | |
print "computing classWeightsForNorm" | |
classWeightsForNorm = {} | |
for c in classes: | |
denominator = classWeights[c] | |
classWeight = 0 | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
numerator = getClassWordWeight(c, word) | |
classWeight += math.log( (numerator + smoothing) / (denominator + smoothingAll) ) | |
classWeightsForNorm[c] = classWeight | |
print "%s: %s" % (c, classWeight) | |
""" | |
""" | |
classWeightsForNorm = { | |
"akamatu": -1079456.95336, | |
"eva": -700571.429639, | |
"ff": -771821.524118, | |
"gs": -347791.872949, | |
"HxH": -468420.143683, | |
"muv": -1163708.88376, | |
"nade": -236711.390405, | |
"naruto": -646282.669202, | |
"original": -1454417.66756, | |
"sammon": -328997.667818, | |
"toraha": -1541410.46535, | |
"type": -916275.035706, | |
"zero": -955843.891426 | |
} | |
if False: | |
#NB で使うクラスごとのウェイト | |
print "computing classWeightsForNorm" | |
classWeightsForNorm = {} | |
for c in classes: | |
denominator = classWeights[c] | |
classWeight = 0 | |
cur = conn.execute("SELECT DISTINCT word FROM class_word_weight WHERE class = ?", [c]) | |
for row in cur: | |
word = row[0] | |
numerator = getClassWordWeight(c, word) | |
classWeight += math.log( (numerator + smoothing) / (denominator + smoothingAll) ) | |
classWeightsForNorm[c] = classWeight | |
print "\"%s\": %s," % (c, classWeight) | |
""" | |
classProbs = {} | |
for c in classes: | |
classProbs[c] = float(classWeights[c]) / classWeightsTotal | |
class_words = { | |
"akamatu": 86732, | |
"eva": 56374, | |
"ff": 61945, | |
"gs": 27869, | |
"HxH": 37560, | |
"muv": 93559, | |
"nade": 18980, | |
"naruto": 51965, | |
"original": 116971, | |
"sammon": 26351, | |
"toraha": 123815, | |
"type": 73836, | |
"zero": 76864 | |
} | |
class_all_sd = { | |
"akamatu": 0.13820801, | |
"eva": 0.16284586, | |
"ff": 0.05680551, | |
"gs": 0.02823316, | |
"HxH": 0.03144673, | |
"muv": 0.20784636, | |
"nade": 0.03702682, | |
"naruto": 0.12971901, | |
"original": 0.30875787, | |
"sammon": 0.02235867, | |
"toraha": 0.30065981, | |
"type": 0.22778956, | |
"zero": 0.15278344 | |
} | |
class_all_avg = { | |
"akamatu": 0.03835642, | |
"eva": 0.02838985, | |
"ff": 0.01669273, | |
"gs": 0.00450519, | |
"HxH": 0.00726387, | |
"muv": 0.05394686, | |
"nade": 0.00416323, | |
"naruto": 0.02190264, | |
"original": 0.09931832, | |
"sammon": 0.00363563, | |
"toraha": 0.08949577, | |
"type": 0.05399907, | |
"zero": 0.03798647 | |
} | |
if False: | |
class_all_sd = {} | |
class_all_avg = {} | |
for targetc in classes: | |
weights = [] | |
cur = conn.execute("SELECT DISTINCT word FROM word_doc_count") | |
for row in cur: | |
word = row[0] | |
weights.append( getClassWordWeight(targetc, word) ) | |
avg = 0 | |
for weight in weights: | |
avg += weight | |
avg /= len(weights) | |
sd = 0 | |
for weight in weights: | |
sd += (weight - avg) ** 2 | |
sd /= len(weights) | |
sd = math.sqrt(sd) | |
class_all_sd[targetc] = sd | |
class_all_avg[targetc] = avg | |
total_info = {} | |
total_info["count"] = 0 | |
total_info["avg"] = 0 | |
total_info["sd"] = 0 | |
total_info["t_sd"] = 0 | |
total_info["t_order"] = 0 | |
class_info = {} | |
for c in classes: | |
print "-" * 30 | |
print "class: %s" % c | |
files = getFiles(corpus + "\\" + c) | |
print "total: %s" % len(files) | |
files = files[-use_for_test:] | |
print "use for test: %s" % len(files) | |
class_info[c] = {} | |
class_info[c]["count"] = 0 | |
class_info[c]["avg"] = 0 | |
class_info[c]["sd"] = 0 | |
class_info[c]["t_sd"] = 0 | |
class_info[c]["t_order"] = 0 | |
fcount = 0 | |
for f in files: | |
fcount += 1 | |
print "-" * 30 | |
print "fileNo: %s (%s / %s)" % (f, fcount, len(files)) | |
doc = open(corpus + "\\" + c + "\\" + f).read() | |
doc = unicode(doc, "utf-8", errors="replace") | |
words = getWords(doc) | |
data = toDataArray(words) | |
print "%s unique words" % len(data) | |
#caching | |
classWordWeightCache = {} | |
for targetc in classes: | |
classWordWeightCache[targetc] = {} | |
for d in data: | |
word, count = d | |
classWordWeightCache[targetc][word] = getClassWordWeight(targetc, word) | |
if False: | |
#WNB | |
results = [] | |
for targetc in classes: | |
weights = [] | |
denominator = classWeights[targetc] + smoothingAll | |
for d in data: | |
word, count = d | |
numerator = classWordWeightCache[targetc][word] + smoothing | |
weight = math.log( numerator / denominator ) | |
weights.append((weight, word, count)) | |
result = 0 | |
for d in weights: | |
weight, word, count = d | |
weight /= classWeightsForNorm[targetc] | |
weight *= count | |
result += weight | |
results.append((-result, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: original | |
* avg: -0.137140975925 | |
* sd: 0.00241394309644 | |
* t_sd: 70.8995672172 | |
* t_order: 1.0 | |
------------------------------ | |
class: type | |
* avg: -0.0855871992036 | |
* sd: 0.00159530415131 | |
* t_sd: 64.9241536277 | |
* t_order: 2.0 | |
------------------------------ | |
class: toraha | |
* avg: -0.127250349819 | |
* sd: 0.00239577621972 | |
* t_sd: 63.2492613594 | |
* t_order: 2.0 | |
------------------------------ | |
class: eva | |
* avg: -0.0458923484897 | |
* sd: 0.00087739070701 | |
* t_sd: 59.564346693 | |
* t_order: 3.2 | |
------------------------------ | |
class: muv | |
* avg: -0.252436650238 | |
* sd: 0.00474098712602 | |
* t_sd: 59.4407303047 | |
* t_order: 3.4 | |
------------------------------ | |
class: naruto | |
* avg: -0.467594472783 | |
* sd: 0.00828687354933 | |
* t_sd: 55.6357015593 | |
* t_order: 4.2 | |
------------------------------ | |
class: zero | |
* avg: -0.120085050263 | |
* sd: 0.00213030720249 | |
* t_sd: 54.3892611741 | |
* t_order: 4.4 | |
------------------------------ | |
class: akamatu | |
* avg: -0.284023821865 | |
* sd: 0.00490075636513 | |
* t_sd: 53.389997845 | |
* t_order: 5.1 | |
------------------------------ | |
class: ff | |
* avg: -0.23744480302 | |
* sd: 0.00410337005736 | |
* t_sd: 44.1746394597 | |
* t_order: 9.0 | |
------------------------------ | |
class: nade | |
* avg: -0.0966124102639 | |
* sd: 0.00160440180478 | |
* t_sd: 41.3514391445 | |
* t_order: 9.9 | |
------------------------------ | |
class: HxH | |
* avg: -0.334581009127 | |
* sd: 0.00586168727486 | |
* t_sd: 40.0990159124 | |
* t_order: 10.0 | |
------------------------------ | |
class: gs | |
* avg: -0.154576677205 | |
* sd: 0.00276307731375 | |
* t_sd: 39.306732706 | |
* t_order: 10.6 | |
------------------------------ | |
class: sammon | |
* avg: -0.0872690688239 | |
* sd: 0.00151748481904 | |
* t_sd: 38.7632930885 | |
* t_order: 10.9 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: -0.18696114131 | |
* sd: 0.00332241228363 | |
* t_sd: 52.706780007 | |
* t_order: 5.82307692308 | |
------------------------------ | |
""" | |
if False: | |
#NB | |
#単語ヒット率、クラスの規模の補正 | |
#クラスウェイトの正規化 | |
results = [] | |
for targetc in classes: | |
is_zero = 0 | |
weight = 0 | |
for d in data: | |
word, count = d | |
w = classWordWeightCache[targetc][word] | |
if w == 0: | |
is_zero += 1 | |
weight += count * ( ( w - class_all_avg[targetc] ) / class_all_sd[targetc] ) | |
weight *= float(len(data)) / is_zero | |
weight /= math.log( class_words[targetc] ) | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
if False: | |
#NB | |
#単語ヒット率、クラスの規模の補正 | |
#クラスウェイトの正規化 | |
results = [] | |
for targetc in classes: | |
is_zero = 0 | |
weight = 0 | |
for d in data: | |
word, count = d | |
w = classWordWeightCache[targetc][word] | |
if w == 0: | |
is_zero += 1 | |
weight += count * ( ( w - class_all_avg[targetc] ) / class_all_sd[targetc] ) | |
weight *= (float(len(data)) / is_zero) / class_words[targetc] | |
weight /= math.log( class_words[targetc] ) | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: naruto | |
* avg: 7.85771505554 | |
* sd: 3.56756696384 | |
* t_sd: 79.4111516922 | |
* t_order: 1.1 | |
------------------------------ | |
class: eva | |
* avg: 1.00478121043 | |
* sd: 0.375988195826 | |
* t_sd: 76.2106404704 | |
* t_order: 1.3 | |
------------------------------ | |
class: muv | |
* avg: 3.98036105321 | |
* sd: 1.49228261299 | |
* t_sd: 76.0927934718 | |
* t_order: 1.0 | |
------------------------------ | |
class: type | |
* avg: 2.34400303075 | |
* sd: 1.0359124207 | |
* t_sd: 75.5155838548 | |
* t_order: 1.0 | |
------------------------------ | |
class: nade | |
* avg: 1.82518128031 | |
* sd: 0.532632300371 | |
* t_sd: 71.3938820751 | |
* t_order: 2.6 | |
------------------------------ | |
class: zero | |
* avg: 2.65612617297 | |
* sd: 1.36310860824 | |
* t_sd: 70.5030024115 | |
* t_order: 2.0 | |
------------------------------ | |
class: akamatu | |
* avg: 4.54102133063 | |
* sd: 1.58929579748 | |
* t_sd: 69.0907162482 | |
* t_order: 2.2 | |
------------------------------ | |
class: original | |
* avg: 2.44676494541 | |
* sd: 0.7840892653 | |
* t_sd: 68.2339784306 | |
* t_order: 2.4 | |
------------------------------ | |
class: ff | |
* avg: 4.28676540777 | |
* sd: 1.55274003989 | |
* t_sd: 67.8119782081 | |
* t_order: 1.8 | |
------------------------------ | |
class: gs | |
* avg: 3.01878942588 | |
* sd: 0.928394613408 | |
* t_sd: 66.9629408737 | |
* t_order: 2.4 | |
------------------------------ | |
class: sammon | |
* avg: 1.9845502653 | |
* sd: 0.550444634306 | |
* t_sd: 66.5805022288 | |
* t_order: 2.6 | |
------------------------------ | |
class: HxH | |
* avg: 5.92661887139 | |
* sd: 1.85299315149 | |
* t_sd: 63.616400556 | |
* t_order: 2.2 | |
------------------------------ | |
class: toraha | |
* avg: 2.39737000475 | |
* sd: 0.532713263543 | |
* t_sd: 61.0279889062 | |
* t_order: 4.0 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 3.40538831187 | |
* sd: 1.24293552826 | |
* t_sd: 70.1885814944 | |
* t_order: 2.04615384615 | |
------------------------------ | |
""" | |
if False: | |
#NB | |
#単語ヒット率、クラスの規模の補正 | |
results = [] | |
for targetc in classes: | |
is_zero = 0 | |
weight = 0 | |
for d in data: | |
word, count = d | |
w = classWordWeightCache[targetc][word] | |
if w == 0: | |
is_zero += 1 | |
weight += count * ( ( w - class_all_avg[targetc] ) / class_all_sd[targetc] ) | |
weight *= float(len(data) / is_zero) / class_words[targetc] | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: naruto | |
* avg: 83.665214889 | |
* sd: 41.0130984792 | |
* t_sd: 78.0367048027 | |
* t_order: 1.1 | |
------------------------------ | |
class: eva | |
* avg: 10.6662471736 | |
* sd: 4.31819472949 | |
* t_sd: 76.7328628376 | |
* t_order: 1.0 | |
------------------------------ | |
class: type | |
* avg: 25.2991514693 | |
* sd: 12.1691816085 | |
* t_sd: 76.3066622729 | |
* t_order: 1.0 | |
------------------------------ | |
class: muv | |
* avg: 41.9424871572 | |
* sd: 18.9360139496 | |
* t_sd: 75.5579215868 | |
* t_order: 1.0 | |
------------------------------ | |
class: zero | |
* avg: 28.7064297744 | |
* sd: 15.9524786569 | |
* t_sd: 71.8242593558 | |
* t_order: 1.2 | |
------------------------------ | |
class: original | |
* avg: 26.1331836316 | |
* sd: 9.83567733139 | |
* t_sd: 69.62326732 | |
* t_order: 1.9 | |
------------------------------ | |
class: akamatu | |
* avg: 48.4283907255 | |
* sd: 19.5069727213 | |
* t_sd: 68.397178624 | |
* t_order: 2.3 | |
------------------------------ | |
class: ff | |
* avg: 45.4521759868 | |
* sd: 18.3730915449 | |
* t_sd: 67.3787863052 | |
* t_order: 1.8 | |
------------------------------ | |
class: nade | |
* avg: 19.1858614064 | |
* sd: 5.78670013123 | |
* t_sd: 66.424895457 | |
* t_order: 3.5 | |
------------------------------ | |
class: toraha | |
* avg: 25.4939271441 | |
* sd: 7.04939001171 | |
* t_sd: 63.6507343774 | |
* t_order: 3.3 | |
------------------------------ | |
class: gs | |
* avg: 32.0010532846 | |
* sd: 11.1705940649 | |
* t_sd: 63.6352680535 | |
* t_order: 3.2 | |
------------------------------ | |
class: sammon | |
* avg: 21.2129327319 | |
* sd: 6.58126896742 | |
* t_sd: 62.9718761652 | |
* t_order: 4.1 | |
------------------------------ | |
class: HxH | |
* avg: 62.7669063042 | |
* sd: 22.5089669382 | |
* t_sd: 59.5150998471 | |
* t_order: 3.3 | |
------------------------------ | |
total: | |
* avg: 36.2272278214 | |
* sd: 14.8616637796 | |
* t_sd: 69.2350397696 | |
* t_order: 2.20769230769 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 36.2272278214 | |
* sd: 14.8616637796 | |
* t_sd: 69.2350397696 | |
* t_order: 2.20769230769 | |
------------------------------ | |
""" | |
if False: | |
#NB | |
#単語ウェイトの正規化, スムージング | |
#クラスウェイトの正規化 | |
results = [] | |
for targetc in classes: | |
weight = 0 | |
for d in data: | |
word, count = d | |
weight += count * ( classWordWeightCache[targetc][word] / class_all_sd[targetc] ) | |
weight /= math.log( class_words[targetc] ) | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: naruto | |
* avg: 36736.8493577 | |
* sd: 8471.6271288 | |
* t_sd: 77.0856678675 | |
* t_order: 1.2 | |
------------------------------ | |
class: eva | |
* avg: 3883.73147289 | |
* sd: 870.795921128 | |
* t_sd: 75.9676328865 | |
* t_order: 1.1 | |
------------------------------ | |
class: original | |
* avg: 9997.3516104 | |
* sd: 1675.12661203 | |
* t_sd: 75.7489719824 | |
* t_order: 1.0 | |
------------------------------ | |
class: type | |
* avg: 6545.00720195 | |
* sd: 1482.05210241 | |
* t_sd: 74.5002698981 | |
* t_order: 1.1 | |
------------------------------ | |
class: nade | |
* avg: 7139.82405704 | |
* sd: 1605.74081556 | |
* t_sd: 68.2814762163 | |
* t_order: 2.6 | |
------------------------------ | |
class: muv | |
* avg: 20449.2642375 | |
* sd: 3557.76034642 | |
* t_sd: 67.1759970731 | |
* t_order: 1.7 | |
------------------------------ | |
class: gs | |
* avg: 12510.3978823 | |
* sd: 2500.36131198 | |
* t_sd: 66.336197872 | |
* t_order: 1.9 | |
------------------------------ | |
class: zero | |
* avg: 9095.0785684 | |
* sd: 1942.55118092 | |
* t_sd: 66.1001569155 | |
* t_order: 1.9 | |
------------------------------ | |
class: akamatu | |
* avg: 20046.9601612 | |
* sd: 3935.47158031 | |
* t_sd: 64.7523588523 | |
* t_order: 2.6 | |
------------------------------ | |
class: sammon | |
* avg: 6536.01392528 | |
* sd: 1239.54027135 | |
* t_sd: 63.4389833057 | |
* t_order: 2.5 | |
------------------------------ | |
class: HxH | |
* avg: 25274.2155252 | |
* sd: 4607.40671408 | |
* t_sd: 62.9553473146 | |
* t_order: 2.1 | |
------------------------------ | |
class: ff | |
* avg: 17568.4392347 | |
* sd: 3401.16867394 | |
* t_sd: 61.6403605965 | |
* t_order: 2.6 | |
------------------------------ | |
class: toraha | |
* avg: 10239.2715163 | |
* sd: 1743.16152109 | |
* t_sd: 58.419977765 | |
* t_order: 3.0 | |
------------------------------ | |
total: | |
* avg: 14309.4157501 | |
* sd: 2848.6741677 | |
* t_sd: 67.8771845035 | |
* t_order: 1.94615384615 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 14309.4157501 | |
* sd: 2848.6741677 | |
* t_sd: 67.8771845035 | |
* t_order: 1.94615384615 | |
------------------------------ | |
""" | |
if False: | |
#NB | |
#単語ウェイトの正規化 2 | |
#クラスウェイトの正規化係数= log( log(words) / log(SD / AVG) ) | |
results = [] | |
for targetc in classes: | |
weight = 0 | |
for d in data: | |
word, count = d | |
weight += count * ( (classWordWeightCache[targetc][word] - class_all_avg[targetc]) / class_all_sd[targetc] ) | |
weight /= math.log( math.log(class_words[targetc]) / math.log(class_all_sd[targetc] / class_all_avg[targetc]) ) | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: naruto | |
* avg: 189265.862807 | |
* sd: 49526.5670983 | |
* t_sd: 80.6490641381 | |
* t_order: 1.0 | |
------------------------------ | |
class: eva | |
* avg: 20099.6794491 | |
* sd: 4962.69373565 | |
* t_sd: 79.391573231 | |
* t_order: 1.1 | |
------------------------------ | |
class: type | |
* avg: 33426.4734412 | |
* sd: 7068.30109253 | |
* t_sd: 77.034314645 | |
* t_order: 1.0 | |
------------------------------ | |
class: nade | |
* avg: 36841.4148841 | |
* sd: 10889.4924887 | |
* t_sd: 75.6910995884 | |
* t_order: 1.7 | |
------------------------------ | |
class: gs | |
* avg: 64493.514528 | |
* sd: 13764.6643212 | |
* t_sd: 72.5862967107 | |
* t_order: 1.2 | |
------------------------------ | |
class: original | |
* avg: 50930.2441433 | |
* sd: 7330.19966959 | |
* t_sd: 72.0680309075 | |
* t_order: 1.1 | |
------------------------------ | |
class: sammon | |
* avg: 33384.9985805 | |
* sd: 6027.70954624 | |
* t_sd: 70.896436528 | |
* t_order: 1.5 | |
------------------------------ | |
class: muv | |
* avg: 104887.404081 | |
* sd: 14881.1424357 | |
* t_sd: 68.3065954679 | |
* t_order: 1.9 | |
------------------------------ | |
class: zero | |
* avg: 46335.5360043 | |
* sd: 8496.93168815 | |
* t_sd: 68.3043528458 | |
* t_order: 1.7 | |
------------------------------ | |
class: akamatu | |
* avg: 101531.931976 | |
* sd: 16207.504749 | |
* t_sd: 63.7851562894 | |
* t_order: 2.7 | |
------------------------------ | |
class: HxH | |
* avg: 128988.172982 | |
* sd: 19942.0923829 | |
* t_sd: 63.6255964155 | |
* t_order: 2.2 | |
------------------------------ | |
class: ff | |
* avg: 89365.4850534 | |
* sd: 13877.0466773 | |
* t_sd: 57.0439242543 | |
* t_order: 3.7 | |
------------------------------ | |
class: toraha | |
* avg: 52458.0530545 | |
* sd: 6785.60393744 | |
* t_sd: 54.9613531469 | |
* t_order: 4.7 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 73231.4439218 | |
* sd: 13827.6884479 | |
* t_sd: 69.5649072437 | |
* t_order: 1.96153846154 | |
------------------------------ | |
""" | |
if False: | |
#NB | |
#単語ウェイトの正規化 2 | |
#単語数でのクラスウェイトの正規化 | |
results = [] | |
for targetc in classes: | |
weight = 0 | |
for d in data: | |
word, count = d | |
weight += count * ( (classWordWeightCache[targetc][word] - class_all_avg[targetc]) / class_all_sd[targetc] ) | |
weight /= math.log( class_words[targetc] ) | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: naruto | |
* avg: 34077.3653344 | |
* sd: 8382.7525182 | |
* t_sd: 77.8673962735 | |
* t_order: 1.1 | |
------------------------------ | |
class: eva | |
* avg: 3622.16296242 | |
* sd: 858.64236675 | |
* t_sd: 76.7281191421 | |
* t_order: 1.1 | |
------------------------------ | |
class: original | |
* avg: 9218.06230557 | |
* sd: 1573.40403082 | |
* t_sd: 75.801984534 | |
* t_order: 1.0 | |
------------------------------ | |
class: type | |
* avg: 6058.1654544 | |
* sd: 1429.37855987 | |
* t_sd: 75.2048839853 | |
* t_order: 1.1 | |
------------------------------ | |
class: nade | |
* avg: 6591.2918163 | |
* sd: 1613.84016076 | |
* t_sd: 69.6614553039 | |
* t_order: 2.5 | |
------------------------------ | |
class: muv | |
* avg: 19011.684212 | |
* sd: 3340.3349179 | |
* t_sd: 67.6568118079 | |
* t_order: 1.7 | |
------------------------------ | |
class: gs | |
* avg: 11630.6584688 | |
* sd: 2451.88951519 | |
* t_sd: 67.5696050299 | |
* t_order: 1.8 | |
------------------------------ | |
class: zero | |
* avg: 8412.49062239 | |
* sd: 1846.9647025 | |
* t_sd: 66.66432557 | |
* t_order: 1.9 | |
------------------------------ | |
class: sammon | |
* avg: 6040.06865616 | |
* sd: 1187.23994403 | |
* t_sd: 64.8867094221 | |
* t_order: 2.0 | |
------------------------------ | |
class: akamatu | |
* avg: 18434.3729229 | |
* sd: 3683.29775318 | |
* t_sd: 64.830745411 | |
* t_order: 2.5 | |
------------------------------ | |
class: HxH | |
* avg: 23372.185875 | |
* sd: 4350.48754053 | |
* t_sd: 63.3883860154 | |
* t_order: 2.1 | |
------------------------------ | |
class: ff | |
* avg: 16219.3481427 | |
* sd: 3194.91399609 | |
* t_sd: 61.1189712744 | |
* t_order: 2.6 | |
------------------------------ | |
class: toraha | |
* avg: 9514.65660175 | |
* sd: 1622.71629987 | |
* t_sd: 57.9614916424 | |
* t_order: 3.2 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 13246.3471827 | |
* sd: 2733.52786967 | |
* t_sd: 68.4108373394 | |
* t_order: 1.89230769231 | |
------------------------------ | |
""" | |
if False: | |
#NB | |
#単語ウェイトの正規化 2 | |
#クラスウェイトの正規化 | |
results = [] | |
for targetc in classes: | |
weight = 0 | |
for d in data: | |
word, count = d | |
weight += count * ( (classWordWeightCache[targetc][word] - class_all_avg[targetc]) / class_all_sd[targetc] ) | |
weight /= -classWeightsForNorm[targetc] | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: naruto | |
* avg: 0.160942025021 | |
* sd: 0.0370557594227 | |
* t_sd: 78.4960158972 | |
* t_order: 1.0 | |
------------------------------ | |
class: eva | |
* avg: 0.0170871531807 | |
* sd: 0.00362073220782 | |
* t_sd: 76.4896782051 | |
* t_order: 1.1 | |
------------------------------ | |
class: sammon | |
* avg: 0.0286161577636 | |
* sd: 0.00578067964044 | |
* t_sd: 73.9101605979 | |
* t_order: 1.3 | |
------------------------------ | |
class: nade | |
* avg: 0.0313155653088 | |
* sd: 0.00835129249704 | |
* t_sd: 73.7113111562 | |
* t_order: 2.1 | |
------------------------------ | |
class: gs | |
* avg: 0.0553352603794 | |
* sd: 0.0131028595326 | |
* t_sd: 73.6267484021 | |
* t_order: 1.2 | |
------------------------------ | |
class: type | |
* avg: 0.0285401959268 | |
* sd: 0.0057798041796 | |
* t_sd: 73.4601258049 | |
* t_order: 1.2 | |
------------------------------ | |
class: original | |
* avg: 0.0435356146707 | |
* sd: 0.00642413168162 | |
* t_sd: 71.3864022587 | |
* t_order: 1.0 | |
------------------------------ | |
class: HxH | |
* avg: 0.110601527111 | |
* sd: 0.0199869706161 | |
* t_sd: 69.9501118356 | |
* t_order: 1.3 | |
------------------------------ | |
class: muv | |
* avg: 0.0895934667559 | |
* sd: 0.0126265828289 | |
* t_sd: 66.7424818325 | |
* t_order: 2.2 | |
------------------------------ | |
class: zero | |
* avg: 0.0396481296124 | |
* sd: 0.00754967201756 | |
* t_sd: 66.3253622488 | |
* t_order: 2.1 | |
------------------------------ | |
class: akamatu | |
* avg: 0.0868682239756 | |
* sd: 0.0145991212338 | |
* t_sd: 65.2543720628 | |
* t_order: 2.3 | |
------------------------------ | |
class: ff | |
* avg: 0.076645317374 | |
* sd: 0.0139070521464 | |
* t_sd: 64.3128200964 | |
* t_order: 2.2 | |
------------------------------ | |
class: toraha | |
* avg: 0.0448700190444 | |
* sd: 0.00622059147879 | |
* t_sd: 53.4540809836 | |
* t_order: 5.7 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 0.0625845120095 | |
* sd: 0.0119234807295 | |
* t_sd: 69.7784362601 | |
* t_order: 1.9 | |
------------------------------ | |
""" | |
if False: | |
#NB | |
#単語ウェイトの正規化 2 | |
results = [] | |
for targetc in classes: | |
weight = 0 | |
for d in data: | |
word, count = d | |
weight += count * ( (classWordWeightCache[targetc][word] - class_all_avg[targetc]) / class_all_sd[targetc] ) | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: naruto | |
* avg: 374070.266236 | |
* sd: 97855.7054523 | |
* t_sd: 76.1569057905 | |
* t_order: 1.2 | |
------------------------------ | |
class: original | |
* avg: 101304.03087 | |
* sd: 20402.8737226 | |
* t_sd: 76.1314332706 | |
* t_order: 1.0 | |
------------------------------ | |
class: eva | |
* avg: 39783.8487203 | |
* sd: 10212.2924983 | |
* t_sd: 75.3033192364 | |
* t_order: 1.2 | |
------------------------------ | |
class: type | |
* avg: 66640.1424431 | |
* sd: 17514.9732109 | |
* t_sd: 73.9593859944 | |
* t_order: 1.2 | |
------------------------------ | |
class: muv | |
* avg: 209285.359505 | |
* sd: 44498.6504037 | |
* t_sd: 67.3233009506 | |
* t_order: 1.7 | |
------------------------------ | |
class: zero | |
* avg: 92604.0942076 | |
* sd: 23180.7915126 | |
* t_sd: 65.8545925468 | |
* t_order: 1.8 | |
------------------------------ | |
class: nade | |
* avg: 71977.5259164 | |
* sd: 16783.2715684 | |
* t_sd: 64.7475623292 | |
* t_order: 3.2 | |
------------------------------ | |
class: akamatu | |
* avg: 202987.189671 | |
* sd: 47461.5896899 | |
* t_sd: 64.6357850464 | |
* t_order: 2.4 | |
------------------------------ | |
class: gs | |
* avg: 127346.87151 | |
* sd: 27654.9541361 | |
* t_sd: 62.8827447079 | |
* t_order: 2.6 | |
------------------------------ | |
class: toraha | |
* avg: 104716.473673 | |
* sd: 21762.1283769 | |
* t_sd: 60.0863294481 | |
* t_order: 2.5 | |
------------------------------ | |
class: ff | |
* avg: 178312.030172 | |
* sd: 40201.771219 | |
* t_sd: 59.8977989823 | |
* t_order: 2.7 | |
------------------------------ | |
class: HxH | |
* avg: 256655.957158 | |
* sd: 53653.9692937 | |
* t_sd: 59.5694826358 | |
* t_order: 2.4 | |
------------------------------ | |
class: sammon | |
* avg: 66297.0414346 | |
* sd: 14282.1635122 | |
* t_sd: 59.4733775141 | |
* t_order: 3.0 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 145536.98704 | |
* sd: 33497.3180459 | |
* t_sd: 66.6170783425 | |
* t_order: 2.06923076923 | |
------------------------------ | |
""" | |
if False: | |
#NB | |
#単語ウェイトの正規化 1 | |
class_sd = {} | |
class_avg = {} | |
for targetc in classes: | |
weights = [] | |
cur = conn.execute("SELECT weight FROM class_word_weight WHERE class= ?", [targetc]) | |
for row in cur: | |
weight = row[0] | |
weights.append(weight) | |
avg = 0 | |
for weight in weights: | |
avg += weight | |
avg /= len(weights) | |
sd = 0 | |
for weight in weights: | |
sd += (weight - avg) ** 2 | |
sd /= len(weights) | |
sd = math.sqrt(sd) | |
class_sd[targetc] = sd | |
class_avg[targetc] = avg | |
results = [] | |
for targetc in classes: | |
weight = 0 | |
for d in data: | |
word, count = d | |
weight += count * ( (classWordWeightCache[targetc][word] - class_avg[targetc]) / class_sd[targetc] ) | |
weight /= class_avg[targetc] | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: ff | |
* avg: 585137.901143 | |
* sd: 269168.974919 | |
* t_sd: 74.3095734354 | |
* t_order: 1.1 | |
------------------------------ | |
class: naruto | |
* avg: 1236820.63119 | |
* sd: 494423.709634 | |
* t_sd: 71.5399838653 | |
* t_order: 1.3 | |
------------------------------ | |
class: gs | |
* avg: 455179.330769 | |
* sd: 206311.418285 | |
* t_sd: 70.8176027906 | |
* t_order: 1.5 | |
------------------------------ | |
class: akamatu | |
* avg: 629731.60129 | |
* sd: 284433.382604 | |
* t_sd: 69.4210865101 | |
* t_order: 1.3 | |
------------------------------ | |
class: sammon | |
* avg: 221456.081932 | |
* sd: 98340.4590167 | |
* t_sd: 69.3733641139 | |
* t_order: 1.5 | |
------------------------------ | |
class: HxH | |
* avg: 857955.912583 | |
* sd: 366781.261103 | |
* t_sd: 68.014557396 | |
* t_order: 1.6 | |
------------------------------ | |
class: eva | |
* avg: 135886.709242 | |
* sd: 46455.1012248 | |
* t_sd: 67.3218388071 | |
* t_order: 1.8 | |
------------------------------ | |
class: muv | |
* avg: 703424.857361 | |
* sd: 250662.714997 | |
* t_sd: 64.2382874655 | |
* t_order: 2.0 | |
------------------------------ | |
class: zero | |
* avg: 301009.668601 | |
* sd: 127967.687533 | |
* t_sd: 63.5756741035 | |
* t_order: 2.2 | |
------------------------------ | |
class: original | |
* avg: 323296.062981 | |
* sd: 118501.205049 | |
* t_sd: 59.3690605231 | |
* t_order: 2.6 | |
------------------------------ | |
class: nade | |
* avg: 228528.555375 | |
* sd: 76917.900427 | |
* t_sd: 57.842596454 | |
* t_order: 5.1 | |
------------------------------ | |
class: type | |
* avg: 215106.633613 | |
* sd: 81639.2951301 | |
* t_sd: 57.3273133441 | |
* t_order: 3.8 | |
------------------------------ | |
class: toraha | |
* avg: 353454.258832 | |
* sd: 125493.347475 | |
* t_sd: 55.9504832529 | |
* t_order: 4.4 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 480537.554224 | |
* sd: 195930.496723 | |
* t_sd: 65.3154940048 | |
* t_order: 2.32307692308 | |
------------------------------ | |
""" | |
if False: | |
#TWMNB これが最終 | |
results = [] | |
for targetc in classes: | |
weight = 0 | |
for d in data: | |
word, count = d | |
#w = (classWordWeightCache[targetc][word] + s ) / (classWeights[targetc] + s_all ) | |
w = math.log( (classWordWeightCache[targetc][word] + s ) / (classWeights[targetc] + s_all ) ) | |
w /= class_weights[s]["count_log"][targetc] | |
w *= count | |
weight += w | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
if False: | |
#TMNB? | |
results = [] | |
for targetc in classes: | |
weight = 0 | |
for d in data: | |
word, count = d | |
#w = (classWordWeightCache[targetc][word] + s ) / (classWeights[targetc] + s_all ) | |
w = math.log( (classWordWeightCache[targetc][word] + s ) / (classWeights[targetc] + s_all ) ) | |
w *= count | |
weight += w | |
weight += math.log( classProbs[targetc] ) | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
if False: | |
#NB | |
#countを使用 | |
results = [] | |
for targetc in classes: | |
weight = 0 | |
for d in data: | |
word, count = d | |
weight += count * classWordWeightCache[targetc][word] | |
weight = math.log( (weight + smoothing ) / (classWeights[targetc] + smoothingAll ) ) | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: nade | |
* avg: -0.35321569126 | |
* sd: 0.317693824586 | |
* t_sd: 77.9451057489 | |
* t_order: 1.0 | |
------------------------------ | |
class: naruto | |
* avg: 1.13571734522 | |
* sd: 0.321846774216 | |
* t_sd: 76.7327089693 | |
* t_order: 1.0 | |
------------------------------ | |
class: eva | |
* avg: -1.45683742732 | |
* sd: 0.317861947967 | |
* t_sd: 74.0866510457 | |
* t_order: 1.1 | |
------------------------------ | |
class: gs | |
* avg: -0.020378879127 | |
* sd: 0.261082028984 | |
* t_sd: 70.1074012718 | |
* t_order: 1.1 | |
------------------------------ | |
class: sammon | |
* avg: -0.276199347805 | |
* sd: 0.227855771152 | |
* t_sd: 69.8730540628 | |
* t_order: 1.2 | |
------------------------------ | |
class: type | |
* avg: -0.809200536449 | |
* sd: 0.232368260019 | |
* t_sd: 68.2639881204 | |
* t_order: 1.5 | |
------------------------------ | |
class: zero | |
* avg: -0.715821091564 | |
* sd: 0.190345773408 | |
* t_sd: 60.4115325197 | |
* t_order: 3.3 | |
------------------------------ | |
class: muv | |
* avg: 0.239033979658 | |
* sd: 0.199338212326 | |
* t_sd: 57.9367844563 | |
* t_order: 3.8 | |
------------------------------ | |
class: HxH | |
* avg: 1.26110426092 | |
* sd: 0.20027967335 | |
* t_sd: 56.2902263005 | |
* t_order: 4.9 | |
------------------------------ | |
class: akamatu | |
* avg: 0.288005638889 | |
* sd: 0.199621080142 | |
* t_sd: 53.6432333844 | |
* t_order: 5.4 | |
------------------------------ | |
class: original | |
* avg: -0.30187523483 | |
* sd: 0.201562382152 | |
* t_sd: 53.1190422145 | |
* t_order: 5.8 | |
------------------------------ | |
class: ff | |
* avg: 0.779932499854 | |
* sd: 0.196900144832 | |
* t_sd: 46.1756097496 | |
* t_order: 8.6 | |
------------------------------ | |
class: toraha | |
* avg: 0.20802236361 | |
* sd: 0.180850722226 | |
* t_sd: 44.8129294908 | |
* t_order: 8.7 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: -0.00167016309207 | |
* sd: 0.234431276566 | |
* t_sd: 62.2614051796 | |
* t_order: 3.64615384615 | |
------------------------------ | |
""" | |
if False: | |
#NB | |
results = [] | |
for targetc in classes: | |
weight = 0 | |
for d in data: | |
word, count = d | |
weight += classWordWeightCache[targetc][word] | |
weight = math.log( (weight + smoothing ) / (classWeights[targetc] + smoothingAll ) ) | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: naruto | |
* avg: -1.36474854074 | |
* sd: 0.0990535691217 | |
* t_sd: 79.2813468834 | |
* t_order: 1.0 | |
------------------------------ | |
class: nade | |
* avg: -2.01728841807 | |
* sd: 0.143152413854 | |
* t_sd: 79.1537377799 | |
* t_order: 1.0 | |
------------------------------ | |
class: eva | |
* avg: -2.74128547465 | |
* sd: 0.178064823951 | |
* t_sd: 76.2231618872 | |
* t_order: 1.0 | |
------------------------------ | |
class: sammon | |
* avg: -1.95365327094 | |
* sd: 0.0983952958705 | |
* t_sd: 72.2969196803 | |
* t_order: 1.1 | |
------------------------------ | |
class: gs | |
* avg: -1.88885688744 | |
* sd: 0.0969894733765 | |
* t_sd: 71.8209223179 | |
* t_order: 1.0 | |
------------------------------ | |
class: type | |
* avg: -2.33734414044 | |
* sd: 0.119035821633 | |
* t_sd: 71.264145829 | |
* t_order: 1.0 | |
------------------------------ | |
class: HxH | |
* avg: -1.21203437949 | |
* sd: 0.0607120323132 | |
* t_sd: 70.9494113472 | |
* t_order: 1.0 | |
------------------------------ | |
class: muv | |
* avg: -1.80312983296 | |
* sd: 0.0861196852463 | |
* t_sd: 68.3250090695 | |
* t_order: 1.7 | |
------------------------------ | |
class: zero | |
* avg: -2.24971291528 | |
* sd: 0.0929647454724 | |
* t_sd: 63.4397230574 | |
* t_order: 2.7 | |
------------------------------ | |
class: akamatu | |
* avg: -1.73289032254 | |
* sd: 0.0731294988792 | |
* t_sd: 58.1629916693 | |
* t_order: 4.1 | |
------------------------------ | |
class: toraha | |
* avg: -1.77935713556 | |
* sd: 0.0718235961978 | |
* t_sd: 56.2762876058 | |
* t_order: 4.8 | |
------------------------------ | |
class: original | |
* avg: -2.03148244387 | |
* sd: 0.0835857883468 | |
* t_sd: 55.8556104571 | |
* t_order: 4.6 | |
------------------------------ | |
class: ff | |
* avg: -1.5566886795 | |
* sd: 0.0587450262853 | |
* t_sd: 48.5787467624 | |
* t_order: 7.7 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: -1.89757480319 | |
* sd: 0.0970593669652 | |
* t_sd: 67.0483087959 | |
* t_order: 2.51538461538 | |
------------------------------ | |
""" | |
if False: | |
#TCNB | |
#weightの正規化 | |
#クラスウェイトの正規化 | |
results = [] | |
for targetc in classes: | |
totalWeight = 0 | |
for d in data: | |
word, count = d | |
weight = 0 | |
for ec in classes: | |
if targetc != ec: | |
weight += ((classWordWeightCache[ec][word] - class_all_avg[ec]) / class_all_sd[ec]) / math.log(class_words[ec]) | |
weight *= count | |
totalWeight += weight | |
results.append((-totalWeight, targetc)) #以降の処理の共通化のために反転させている | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: naruto | |
* avg: -408928.384013 | |
* sd: 8382.7525182 | |
* t_sd: 77.8673962735 | |
* t_order: 1.1 | |
------------------------------ | |
class: eva | |
* avg: -43465.955549 | |
* sd: 858.64236675 | |
* t_sd: 76.7281191421 | |
* t_order: 1.1 | |
------------------------------ | |
class: original | |
* avg: -110616.747667 | |
* sd: 1573.40403082 | |
* t_sd: 75.801984534 | |
* t_order: 1.0 | |
------------------------------ | |
class: type | |
* avg: -72697.9854528 | |
* sd: 1429.37855987 | |
* t_sd: 75.2048839853 | |
* t_order: 1.1 | |
------------------------------ | |
class: nade | |
* avg: -79095.5017956 | |
* sd: 1613.84016076 | |
* t_sd: 69.6614553039 | |
* t_order: 2.5 | |
------------------------------ | |
class: muv | |
* avg: -228140.210543 | |
* sd: 3340.3349179 | |
* t_sd: 67.6568118079 | |
* t_order: 1.7 | |
------------------------------ | |
class: gs | |
* avg: -139567.901626 | |
* sd: 2451.88951519 | |
* t_sd: 67.5696050299 | |
* t_order: 1.8 | |
------------------------------ | |
class: zero | |
* avg: -100949.887469 | |
* sd: 1846.9647025 | |
* t_sd: 66.66432557 | |
* t_order: 1.9 | |
------------------------------ | |
class: sammon | |
* avg: -72480.823874 | |
* sd: 1187.23994403 | |
* t_sd: 64.8867094221 | |
* t_order: 2.0 | |
------------------------------ | |
class: akamatu | |
* avg: -221212.475075 | |
* sd: 3683.29775318 | |
* t_sd: 64.830745411 | |
* t_order: 2.5 | |
------------------------------ | |
class: HxH | |
* avg: -280466.2305 | |
* sd: 4350.48754053 | |
* t_sd: 63.3883860154 | |
* t_order: 2.1 | |
------------------------------ | |
class: ff | |
* avg: -194632.177712 | |
* sd: 3194.91399609 | |
* t_sd: 61.1189712744 | |
* t_order: 2.6 | |
------------------------------ | |
class: toraha | |
* avg: -114175.879221 | |
* sd: 1622.71629987 | |
* t_sd: 57.9614916424 | |
* t_order: 3.2 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: -158956.166192 | |
* sd: 2733.52786967 | |
* t_sd: 68.4108373394 | |
* t_order: 1.89230769231 | |
------------------------------ | |
""" | |
if False: | |
#TWCNB 最終 | |
results = [] | |
for targetc in classes: | |
denominator = 0 | |
for ec in classes: | |
if targetc != ec: | |
denominator += classWeights[ec] | |
weights = [] | |
for d in data: | |
word, count = d | |
numerator = 0 | |
for ec in classes: | |
if targetc != ec: | |
numerator += classWordWeightCache[ec][word] | |
weight = math.log( (numerator + s) / (denominator + s_all) ) | |
weights.append((weight, word, count)) | |
result = 0 | |
for d in weights: | |
weight, word, count = d | |
weight *= count | |
weight /= class_weights[s]["log_complement"][targetc] | |
result += weight | |
#result -= math.log(classProbs[targetc]) | |
results.append((-result, targetc)) #以降の処理の共通化のために反転させている | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: naruto | |
* avg: 1311695.63587 | |
* sd: 4568.74229051 | |
* t_sd: 70.0650279355 | |
* t_order: 1.0 | |
------------------------------ | |
class: eva | |
* avg: 128274.546375 | |
* sd: 438.090584609 | |
* t_sd: 69.1028324724 | |
* t_order: 1.0 | |
------------------------------ | |
class: nade | |
* avg: 272165.286103 | |
* sd: 851.564247431 | |
* t_sd: 64.260053795 | |
* t_order: 1.0 | |
------------------------------ | |
class: type | |
* avg: 239876.368143 | |
* sd: 783.932784687 | |
* t_sd: 63.0837281902 | |
* t_order: 1.1 | |
------------------------------ | |
class: gs | |
* avg: 433578.305835 | |
* sd: 1332.75961534 | |
* t_sd: 61.2718654195 | |
* t_order: 1.0 | |
------------------------------ | |
class: sammon | |
* avg: 245704.418527 | |
* sd: 751.569559483 | |
* t_sd: 60.2438011563 | |
* t_order: 1.2 | |
------------------------------ | |
class: HxH | |
* avg: 941604.543454 | |
* sd: 2817.82654017 | |
* t_sd: 58.8588912753 | |
* t_order: 2.1 | |
------------------------------ | |
class: zero | |
* avg: 337397.073691 | |
* sd: 1066.67336837 | |
* t_sd: 57.1218074469 | |
* t_order: 4.2 | |
------------------------------ | |
class: muv | |
* avg: 706452.265907 | |
* sd: 1932.89368071 | |
* t_sd: 56.0904083166 | |
* t_order: 5.2 | |
------------------------------ | |
class: ff | |
* avg: 668932.715533 | |
* sd: 2043.17281962 | |
* t_sd: 55.5907307836 | |
* t_order: 6.3 | |
------------------------------ | |
class: akamatu | |
* avg: 799218.461441 | |
* sd: 2374.55913981 | |
* t_sd: 52.866347537 | |
* t_order: 7.3 | |
------------------------------ | |
class: original | |
* avg: 385981.180659 | |
* sd: 1136.53176629 | |
* t_sd: 41.6690512266 | |
* t_order: 11.1 | |
------------------------------ | |
class: toraha | |
* avg: 356330.850268 | |
* sd: 842.690874277 | |
* t_sd: 30.9558353575 | |
* t_order: 12.6 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 525170.127062 | |
* sd: 1610.84671318 | |
* t_sd: 57.0138754548 | |
* t_order: 4.23846153846 | |
------------------------------ | |
""" | |
if False: | |
#TCNB | |
#weightのnormalizeしない | |
#p(class)の符号間違ってるか | |
results = [] | |
for targetc in classes: | |
denominator = 0 | |
for ec in classes: | |
if targetc != ec: | |
denominator += classWeights[ec] | |
weights = [] | |
for d in data: | |
word, count = d | |
numerator = 0 | |
for ec in classes: | |
if targetc != ec: | |
numerator += classWordWeightCache[ec][word] | |
weight = math.log( (numerator + smoothing) / (denominator + smoothingAll) ) | |
weights.append((weight, word, count)) | |
result = 0 | |
for d in weights: | |
weight, word, count = d | |
weight *= count | |
result += weight | |
result += math.log(classProbs[targetc]) | |
results.append((-result, targetc)) #以降の処理の共通化のために反転させている | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: naruto | |
* avg: 1311701.77275 | |
* sd: 4570.14264271 | |
* t_sd: 70.039306356 | |
* t_order: 1.0 | |
------------------------------ | |
class: eva | |
* avg: 128280.683259 | |
* sd: 439.51732589 | |
* t_sd: 68.6234471934 | |
* t_order: 1.0 | |
------------------------------ | |
class: nade | |
* avg: 272171.422988 | |
* sd: 853.347967684 | |
* t_sd: 64.307921588 | |
* t_order: 1.0 | |
------------------------------ | |
class: type | |
* avg: 239882.505028 | |
* sd: 785.329783374 | |
* t_sd: 62.8375718977 | |
* t_order: 1.3 | |
------------------------------ | |
class: gs | |
* avg: 433584.44272 | |
* sd: 1334.47600307 | |
* t_sd: 61.3190306866 | |
* t_order: 1.0 | |
------------------------------ | |
class: sammon | |
* avg: 245710.555412 | |
* sd: 753.288574189 | |
* t_sd: 60.3069055735 | |
* t_order: 1.2 | |
------------------------------ | |
class: HxH | |
* avg: 941610.680338 | |
* sd: 2819.53528976 | |
* t_sd: 58.8642982022 | |
* t_order: 2.1 | |
------------------------------ | |
class: zero | |
* avg: 337403.210576 | |
* sd: 1068.26404341 | |
* t_sd: 56.9676393264 | |
* t_order: 4.1 | |
------------------------------ | |
class: muv | |
* avg: 706458.402791 | |
* sd: 1934.43150062 | |
* t_sd: 55.9748821475 | |
* t_order: 5.2 | |
------------------------------ | |
class: ff | |
* avg: 668938.852418 | |
* sd: 2044.84485651 | |
* t_sd: 55.5855891945 | |
* t_order: 6.4 | |
------------------------------ | |
class: akamatu | |
* avg: 799224.598326 | |
* sd: 2376.16498253 | |
* t_sd: 52.8101602153 | |
* t_order: 7.3 | |
------------------------------ | |
class: original | |
* avg: 385987.317543 | |
* sd: 1138.20185836 | |
* t_sd: 41.6069569223 | |
* t_order: 11.1 | |
------------------------------ | |
class: toraha | |
* avg: 356336.987152 | |
* sd: 844.509998667 | |
* t_sd: 30.9725229619 | |
* t_order: 12.6 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 525176.263947 | |
* sd: 1612.46575591 | |
* t_sd: 56.9397101743 | |
* t_order: 4.25384615385 | |
------------------------------ | |
""" | |
if False: | |
#TWCNB | |
results = [] | |
for targetc in classes: | |
denominator = 0 | |
for ec in classes: | |
if targetc != ec: | |
denominator += classWeights[ec] | |
weights = [] | |
for d in data: | |
word, count = d | |
numerator = 0 | |
for ec in classes: | |
if targetc != ec: | |
numerator += classWordWeightCache[ec][word] | |
weight = math.log( (numerator + s) / (denominator + s_all) ) | |
weights.append((weight, word, count)) | |
result = 0 | |
for d in weights: | |
weight, word, count = d | |
weight /= classComplementWeightsForNorm[targetc] | |
weight *= count | |
result += weight | |
results.append((result, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: original | |
* avg: 0.12525061358 | |
* sd: 0.000438189289814 | |
* t_sd: 77.3870036459 | |
* t_order: 1.0 | |
------------------------------ | |
class: type | |
* avg: 0.0778401173711 | |
* sd: 0.000302713438858 | |
* t_sd: 69.6060377116 | |
* t_order: 1.8 | |
------------------------------ | |
class: eva | |
* avg: 0.0416256310952 | |
* sd: 0.000154656723194 | |
* t_sd: 65.1709758438 | |
* t_order: 2.2 | |
------------------------------ | |
class: toraha | |
* avg: 0.115629437295 | |
* sd: 0.000426885832668 | |
* t_sd: 64.7519737869 | |
* t_order: 2.1 | |
------------------------------ | |
class: muv | |
* avg: 0.229242629576 | |
* sd: 0.00082232383802 | |
* t_sd: 63.5943890608 | |
* t_order: 2.2 | |
------------------------------ | |
class: naruto | |
* avg: 0.425641252201 | |
* sd: 0.00151855309519 | |
* t_sd: 63.5884999764 | |
* t_order: 2.1 | |
------------------------------ | |
class: zero | |
* avg: 0.109485181458 | |
* sd: 0.000389838954393 | |
* t_sd: 57.0235890994 | |
* t_order: 3.4 | |
------------------------------ | |
class: akamatu | |
* avg: 0.259344670378 | |
* sd: 0.000891560474804 | |
* t_sd: 53.3260681909 | |
* t_order: 4.4 | |
------------------------------ | |
class: nade | |
* avg: 0.0883176079352 | |
* sd: 0.000286864991421 | |
* t_sd: 46.8204370575 | |
* t_order: 7.5 | |
------------------------------ | |
class: ff | |
* avg: 0.21706730743 | |
* sd: 0.000738857892353 | |
* t_sd: 44.8621334799 | |
* t_order: 8.6 | |
------------------------------ | |
class: gs | |
* avg: 0.140695615643 | |
* sd: 0.000477107046101 | |
* t_sd: 44.3074991755 | |
* t_order: 9.0 | |
------------------------------ | |
class: HxH | |
* avg: 0.305548421088 | |
* sd: 0.00104171011893 | |
* t_sd: 43.1230073263 | |
* t_order: 9.6 | |
------------------------------ | |
class: sammon | |
* avg: 0.0797312092984 | |
* sd: 0.000270106662254 | |
* t_sd: 42.7610087446 | |
* t_order: 9.8 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 0.170416899565 | |
* sd: 0.000596874489076 | |
* t_sd: 56.6402017769 | |
* t_order: 4.9 | |
------------------------------ | |
""" | |
if False: | |
#TWCNB | |
#こっちが正しいと思うんだけどなー | |
results = [] | |
for targetc in classes: | |
denominator = 0 | |
for ec in classes: | |
if targetc != ec: | |
denominator += classWeights[ec] | |
weights = [] | |
weightTotal = 0 | |
for d in data: | |
word, count = d | |
numerator = 0 | |
for ec in classes: | |
if targetc != ec: | |
numerator += classWordWeightCache[ec][word] | |
weight = math.log( (numerator + smoothing) / (denominator + smoothingAll) ) | |
weightTotal += weight | |
weights.append((weight, word, count)) | |
weightTotal = math.fabs(weightTotal) | |
result = 0 | |
for d in weights: | |
weight, word, count = d | |
weight /= weightTotal | |
weight *= count | |
result += weight | |
results.append((-result, targetc)) #以降の処理の共通化のために反転させている | |
results.sort(reverse=True) | |
""" | |
クラス全体のウェイト差の補正が効き過ぎている | |
------------------------------ | |
class: naruto | |
* avg: 13.6381259419 | |
* sd: 0.027352385018 | |
* t_sd: 72.261804576 | |
* t_order: 1.0 | |
------------------------------ | |
class: eva | |
* avg: 4.79923405287 | |
* sd: 0.00721792019273 | |
* t_sd: 64.9145016627 | |
* t_order: 1.6 | |
------------------------------ | |
class: nade | |
* avg: 6.73628791178 | |
* sd: 0.0112160933504 | |
* t_sd: 60.8821347702 | |
* t_order: 2.1 | |
------------------------------ | |
class: gs | |
* avg: 7.68968496961 | |
* sd: 0.0119009382662 | |
* t_sd: 58.0733576324 | |
* t_order: 3.4 | |
------------------------------ | |
class: type | |
* avg: 6.53928309049 | |
* sd: 0.0101006274386 | |
* t_sd: 58.0067771116 | |
* t_order: 4.5 | |
------------------------------ | |
class: sammon | |
* avg: 6.99148441714 | |
* sd: 0.0107147999552 | |
* t_sd: 57.5027194924 | |
* t_order: 3.1 | |
------------------------------ | |
class: HxH | |
* avg: 11.7329272032 | |
* sd: 0.0181516989659 | |
* t_sd: 53.0191543658 | |
* t_order: 6.6 | |
------------------------------ | |
class: zero | |
* avg: 6.66400255582 | |
* sd: 0.00956235635069 | |
* t_sd: 51.6261243177 | |
* t_order: 6.2 | |
------------------------------ | |
class: ff | |
* avg: 11.1111756914 | |
* sd: 0.0164073247014 | |
* t_sd: 51.4868003902 | |
* t_order: 8.1 | |
------------------------------ | |
class: original | |
* avg: 7.29357490509 | |
* sd: 0.0107120088055 | |
* t_sd: 50.7473680855 | |
* t_order: 7.8 | |
------------------------------ | |
class: akamatu | |
* avg: 9.98884550201 | |
* sd: 0.0154106658725 | |
* t_sd: 48.7774841378 | |
* t_order: 8.6 | |
------------------------------ | |
class: muv | |
* avg: 9.7249427688 | |
* sd: 0.0135902418477 | |
* t_sd: 41.607986123 | |
* t_order: 10.7 | |
------------------------------ | |
class: toraha | |
* avg: 8.61086969115 | |
* sd: 0.0128939908039 | |
* t_sd: 22.7685179454 | |
* t_order: 13.0 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 8.57849528471 | |
* sd: 0.0134793116591 | |
* t_sd: 53.2057485085 | |
* t_order: 5.9 | |
------------------------------ | |
""" | |
""" | |
#こうとも読み取れるが、どうも違うようだ | |
results = [] | |
for targetc in classes: | |
denominator = 0 | |
for ec in classes: | |
if targetc != ec: | |
denominator += classWeights[ec] | |
weights = [] | |
weightTotal = 0 | |
for d in data: | |
word, count = d | |
numerator = 0 | |
for ec in classes: | |
if targetc != ec: | |
numerator += classWordWeightCache[ec][word] | |
weight = math.log( (numerator + smoothing) / (denominator + smoothingAll) ) | |
weightTotal += weight | |
weights.append((weight, word, count)) | |
weightTotal = math.fabs(weightTotal) | |
result = 0 | |
for d in weights: | |
weight, word, count = d | |
weight /= weightTotal | |
weight *= count | |
result += weight | |
results.append((result, targetc)) | |
print "-" * 20 | |
results.sort() | |
for d in results: | |
weight, targetc = d | |
try: | |
print "%s: %s" % (targetc, weight) | |
except: | |
pass | |
continue | |
""" | |
if False: | |
#TCNB? | |
#weightのnormalize | |
results = [] | |
for targetc in classes: | |
denominator = 0 | |
for ec in classes: | |
if targetc != ec: | |
denominator += classWeights[ec] | |
numerator = 0 | |
for d in data: | |
word, count = d | |
for ec in classes: | |
if targetc != ec: | |
numerator += float(count) * classWordWeightCache[ec][word] | |
weight = math.log( (numerator + smoothing) / (denominator + smoothingAll) ) | |
weight /= classComplementWeightsForNorm[targetc] | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: eva | |
* avg: 4.87099177835e-007 | |
* sd: 1.06851775933e-008 | |
* t_sd: 76.4226615668 | |
* t_order: 1.1 | |
------------------------------ | |
class: type | |
* avg: 2.69077514492e-007 | |
* sd: 1.00413501149e-008 | |
* t_sd: 75.921013835 | |
* t_order: 1.0 | |
------------------------------ | |
class: naruto | |
* avg: -3.55364232009e-007 | |
* sd: 1.13495785547e-008 | |
* t_sd: 73.8726364136 | |
* t_order: 1.0 | |
------------------------------ | |
class: original | |
* avg: 1.12023754034e-007 | |
* sd: 7.19700948595e-009 | |
* t_sd: 66.5402984012 | |
* t_order: 2.1 | |
------------------------------ | |
class: muv | |
* avg: -6.89989489719e-008 | |
* sd: 7.15316890934e-009 | |
* t_sd: 62.9137973967 | |
* t_order: 2.8 | |
------------------------------ | |
class: zero | |
* avg: 2.44409939089e-007 | |
* sd: 7.67566960156e-009 | |
* t_sd: 61.0624614695 | |
* t_order: 2.3 | |
------------------------------ | |
class: nade | |
* avg: 1.39660902629e-007 | |
* sd: 6.44298117977e-009 | |
* t_sd: 57.7200122252 | |
* t_order: 3.2 | |
------------------------------ | |
class: akamatu | |
* avg: -8.06696463057e-008 | |
* sd: 7.45562880648e-009 | |
* t_sd: 55.1561437055 | |
* t_order: 4.8 | |
------------------------------ | |
class: HxH | |
* avg: -3.89382459583e-007 | |
* sd: 7.37873101148e-009 | |
* t_sd: 54.1396830006 | |
* t_order: 5.9 | |
------------------------------ | |
class: gs | |
* avg: 3.45436590018e-008 | |
* sd: 7.51327949295e-009 | |
* t_sd: 53.6397384794 | |
* t_order: 4.2 | |
------------------------------ | |
class: sammon | |
* avg: 1.09837926853e-007 | |
* sd: 6.9175453382e-009 | |
* t_sd: 51.725170962 | |
* t_order: 5.2 | |
------------------------------ | |
class: ff | |
* avg: -2.35942154699e-007 | |
* sd: 7.37570574225e-009 | |
* t_sd: 50.7426791356 | |
* t_order: 8.1 | |
------------------------------ | |
class: toraha | |
* avg: -5.81099984164e-008 | |
* sd: 4.91005895679e-009 | |
* t_sd: 40.1049564352 | |
* t_order: 10.1 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 1.60142641498e-008 | |
* sd: 7.85352959906e-009 | |
* t_sd: 59.9970194636 | |
* t_order: 3.98461538462 | |
------------------------------ | |
""" | |
if False: | |
#TCNB? | |
#weightのnormalize | |
#countを使っていない | |
results = [] | |
for targetc in classes: | |
denominator = 0 | |
for ec in classes: | |
if targetc != ec: | |
denominator += classWeights[ec] | |
numerator = 0 | |
for d in data: | |
word, count = d | |
for ec in classes: | |
if targetc != ec: | |
numerator += classWordWeightCache[ec][word] | |
weight = math.log( (numerator + smoothing) / (denominator + smoothingAll) ) | |
weight /= classComplementWeightsForNorm[targetc] | |
results.append((weight, targetc)) | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: original | |
* avg: 6.62625214255e-007 | |
* sd: 4.6525737139e-009 | |
* t_sd: 77.5025301841 | |
* t_order: 1.0 | |
------------------------------ | |
class: type | |
* avg: 7.61930966997e-007 | |
* sd: 6.059993792e-009 | |
* t_sd: 76.0942398112 | |
* t_order: 1.1 | |
------------------------------ | |
class: muv | |
* avg: 5.85507315441e-007 | |
* sd: 4.06532772127e-009 | |
* t_sd: 73.1164726683 | |
* t_order: 1.3 | |
------------------------------ | |
class: toraha | |
* avg: 5.77663868144e-007 | |
* sd: 4.03817609203e-009 | |
* t_sd: 72.9741398349 | |
* t_order: 1.2 | |
------------------------------ | |
class: eva | |
* avg: 8.9897859696e-007 | |
* sd: 6.27732496249e-009 | |
* t_sd: 71.1492435196 | |
* t_order: 1.4 | |
------------------------------ | |
class: naruto | |
* avg: 4.4697418711e-007 | |
* sd: 3.03207874445e-009 | |
* t_sd: 66.051791715 | |
* t_order: 1.9 | |
------------------------------ | |
class: zero | |
* avg: 7.35161031788e-007 | |
* sd: 4.93589286369e-009 | |
* t_sd: 59.8616699465 | |
* t_order: 2.9 | |
------------------------------ | |
class: akamatu | |
* avg: 5.64897463998e-007 | |
* sd: 3.68844549795e-009 | |
* t_sd: 56.4644369085 | |
* t_order: 3.4 | |
------------------------------ | |
class: HxH | |
* avg: 3.96867750141e-007 | |
* sd: 2.60172307858e-009 | |
* t_sd: 45.5582059752 | |
* t_order: 7.8 | |
------------------------------ | |
class: nade | |
* avg: 6.6466937989e-007 | |
* sd: 3.88504712872e-009 | |
* t_sd: 45.4412369728 | |
* t_order: 7.7 | |
------------------------------ | |
class: ff | |
* avg: 5.08462211128e-007 | |
* sd: 3.4411400955e-009 | |
* t_sd: 45.0967878964 | |
* t_order: 8.5 | |
------------------------------ | |
class: gs | |
* avg: 6.22407865462e-007 | |
* sd: 3.9970898373e-009 | |
* t_sd: 43.8867624539 | |
* t_order: 8.5 | |
------------------------------ | |
class: sammon | |
* avg: 6.42764861928e-007 | |
* sd: 3.98313719041e-009 | |
* t_sd: 42.9237257041 | |
* t_order: 8.8 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 6.2068543948e-007 | |
* sd: 4.20445774756e-009 | |
* t_sd: 59.7016341223 | |
* t_order: 4.26923076923 | |
------------------------------ | |
""" | |
if False: | |
#TCNB? | |
#weightのnormalizeしていない | |
#データが小規模、クラス間の不均衡が小規模なら精度は十分 | |
results = [] | |
for targetc in classes: | |
denominator = 0 | |
for ec in classes: | |
if targetc != ec: | |
denominator += classWeights[ec] | |
numerator = 0 | |
for d in data: | |
word, count = d | |
for ec in classes: | |
if targetc != ec: | |
numerator += classWordWeightCache[ec][word] | |
weight = math.log( (numerator + smoothing) / (denominator + smoothingAll) ) | |
weight *= count | |
results.append((-weight, targetc)) #処理の共通化のため反転 | |
results.sort(reverse=True) | |
""" | |
------------------------------ | |
class: eva | |
* avg: 7.31008067079 | |
* sd: 0.0477056649335 | |
* t_sd: 77.13911659 | |
* t_order: 1.0 | |
------------------------------ | |
class: type | |
* avg: 3.41829310554 | |
* sd: 0.0221013252225 | |
* t_sd: 76.9002779576 | |
* t_order: 1.0 | |
------------------------------ | |
class: muv | |
* avg: 8.01272630654 | |
* sd: 0.0519458005303 | |
* t_sd: 74.7011672721 | |
* t_order: 1.0 | |
------------------------------ | |
class: naruto | |
* avg: 2.36184923361 | |
* sd: 0.0135671588159 | |
* t_sd: 73.271893772 | |
* t_order: 1.0 | |
------------------------------ | |
class: toraha | |
* avg: 8.45739691564 | |
* sd: 0.0261536550873 | |
* t_sd: 66.9009896355 | |
* t_order: 2.2 | |
------------------------------ | |
class: original | |
* avg: 5.25567675686 | |
* sd: 0.0225737024698 | |
* t_sd: 66.5506223112 | |
* t_order: 1.7 | |
------------------------------ | |
class: zero | |
* avg: 3.83547109643 | |
* sd: 0.0167425389659 | |
* t_sd: 62.6982672155 | |
* t_order: 2.1 | |
------------------------------ | |
class: nade | |
* avg: 2.32819454263 | |
* sd: 0.00769474520835 | |
* t_sd: 58.8801720694 | |
* t_order: 3.4 | |
------------------------------ | |
class: akamatu | |
* avg: 2.59794472306 | |
* sd: 0.0118857727134 | |
* t_sd: 58.7072128044 | |
* t_order: 3.4 | |
------------------------------ | |
class: HxH | |
* avg: 2.15826330926 | |
* sd: 0.00905616346116 | |
* t_sd: 55.1769348426 | |
* t_order: 4.7 | |
------------------------------ | |
class: gs | |
* avg: 7.37470112088 | |
* sd: 0.0307546830308 | |
* t_sd: 53.7358576848 | |
* t_order: 4.7 | |
------------------------------ | |
class: sammon | |
* avg: 3.55448591211 | |
* sd: 0.0144491717436 | |
* t_sd: 53.0977454714 | |
* t_order: 4.9 | |
------------------------------ | |
class: ff | |
* avg: 8.67338981059 | |
* sd: 0.0406567143574 | |
* t_sd: 50.2106939107 | |
* t_order: 7.2 | |
------------------------------ | |
""" | |
""" | |
------------------------------ | |
total: | |
* avg: 5.02603642338 | |
* sd: 0.02425285358 | |
* t_sd: 63.6900731952 | |
* t_order: 2.94615384615 | |
------------------------------ | |
""" | |
""" | |
#上手く動く | |
results = [] | |
for targetc in classes: | |
complementAll = 0 | |
for ec in classes: | |
if targetc != ec: | |
complementAll += classWeights[ec] | |
weight = 0 | |
for d in data: | |
word, count = d | |
for ec in classes: | |
if targetc != ec: | |
weight += classWordWeightCache[ec][word] | |
weight = math.log( (weight + smoothing ) / (complementAll + smoothingAll ) ) | |
results.append((weight, targetc)) | |
print "-" * 20 | |
results.sort() | |
for d in results: | |
weight, targetc = d | |
try: | |
print "%s: %s" % (targetc, weight) | |
except: | |
pass | |
continue | |
""" | |
"""#精度が微妙? | |
results = [] | |
for targetc in classes: | |
print targetc | |
#compute denominator of complement | |
complementAll = smoothingAll | |
for ec in classes: | |
if targetc != ec: | |
complementAll += classWeights[ec] | |
print "complementAll: %s" % complementAll | |
weights = [] | |
for d in data: | |
word, count = d | |
#compute numerator of complement | |
complement = smoothing | |
for ec in classes: | |
if targetc != ec: | |
complement += classWordWeightCache[ec][word] | |
#print "complement: %s" % complement | |
#compute weight | |
weight = math.log( float(complement) / complementAll ) | |
#print "weight: %s" % weight | |
weights.append((weight, word, count)) | |
weights.sort(reverse=True) | |
for d in weights[:50]: | |
weight, word, count = d | |
try: | |
print "%s: %s, %s" % (word.encode("shift-jis"), count, weight) | |
except: | |
pass | |
#normalize | |
nWeights = [] | |
nWeightTotal = 0 | |
for d in weights: | |
weight, word, count = d | |
#weight /= classWeights[targetc] | |
#weight *= count # | |
nWeights.append((weight, word)) | |
nWeightTotal += weight | |
#print "weight: %s" % weight | |
print "nWeightTotal: %s" % nWeightTotal | |
results.append((nWeightTotal, targetc)) | |
print "-" * 20 | |
results.sort(reverse=True) | |
for result in results: | |
nWeightTotal, targetc = result | |
try: | |
print "%s: %s" % (targetc.encode("shift-jis"), nWeightTotal) | |
except: | |
pass | |
""" | |
"""#とりあえず動くがバグあり版 | |
results = [] | |
for targetc in classes: | |
#compute denominator of complement | |
print "computing denominator of complement" | |
complementAll = smoothingAll | |
for ec in classes: | |
if targetc != ec: | |
complementAll += classWeights[ec] | |
print "complementAll: %s" % complementAll | |
weights = [] | |
for d in data: | |
word, count = d | |
#compute numerator of complement | |
complement = smoothing | |
for ec in classes: | |
if targetc != ec: | |
complement += getClassWordWeight(ec, word) | |
#compute weight | |
weight = count | |
weight *= math.log( float(complement) / complementAll ) | |
weights.append((weight, word)) | |
#normalize | |
nWeights = [] | |
nWeightTotal = 0 | |
for d in weights: | |
weight, word = d | |
weight /= classWeights[targetc] | |
nWeights.append((weight, word)) | |
nWeightTotal += weight | |
print "nWeightTotal: %s" % nWeightTotal | |
results.append((nWeightTotal, targetc)) | |
results.sort(reverse=True) | |
for result in results: | |
nWeightTotal, targetc = result | |
try: | |
print "%s: %s" % (targetc.encode("shift-jis"), nWeightTotal) | |
except: | |
pass | |
""" | |
print "-" * 20 | |
(avg, sd, t_sd) = getSD(results, c) | |
print "* avg: %s" % avg | |
print "* sd: %s" % sd | |
print "* t_sd %s" % t_sd | |
printProbs(results, c) | |
t_order = getOrder(results, c) | |
class_info[c]["count"] += 1 | |
class_info[c]["avg"] += avg | |
class_info[c]["sd"] += sd | |
class_info[c]["t_sd"] += t_sd | |
class_info[c]["t_order"] += float(t_order) | |
total_info["count"] += class_info[c]["count"] | |
total_info["avg"] += class_info[c]["avg"] | |
total_info["sd"] += class_info[c]["sd"] | |
total_info["t_sd"] += class_info[c]["t_sd"] | |
total_info["t_order"] += class_info[c]["t_order"] | |
class_info[c]["avg"] /= class_info[c]["count"] | |
class_info[c]["sd"] /= class_info[c]["count"] | |
class_info[c]["t_sd"] /= class_info[c]["count"] | |
class_info[c]["t_order"] /= class_info[c]["count"] | |
total_info["avg"] /= total_info["count"] | |
total_info["sd"] /= total_info["count"] | |
total_info["t_sd"] /= total_info["count"] | |
total_info["t_order"] /= total_info["count"] | |
ci = [] | |
for c in class_info: | |
ci.append((class_info[c]["t_sd"], c, class_info[c]["avg"], class_info[c]["sd"], class_info[c]["t_order"])) | |
ci.sort(reverse=True) | |
print "-" * 30 | |
for d in ci: | |
t_sd, c, avg, sd, t_order = d | |
print "class: %s" % c | |
print "* avg: %s" % avg | |
print "* sd: %s" % sd | |
print "* t_sd: %s" % t_sd | |
print "* t_order: %s" % t_order | |
print "-" * 30 | |
print "total:" | |
print "* avg: %s" % total_info["avg"] | |
print "* sd: %s" % total_info["sd"] | |
print "* t_sd: %s" % total_info["t_sd"] | |
print "* t_order: %s" % total_info["t_order"] | |
print "-" * 30 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
results = [] | |
for targetc in classes: | |
denominator = 0 | |
for ec in classes: | |
if targetc != ec: | |
denominator += classWeights[ec] | |
weights = [] | |
for d in data: | |
word, count = d | |
numerator = 0 | |
for ec in classes: | |
if targetc != ec: | |
numerator += classWordWeightCache[ec][word] | |
weight = math.log( (numerator + s) / (denominator + s_all) ) | |
weights.append((weight, word, count)) | |
result = 0 | |
for d in weights: | |
weight, word, count = d | |
weight *= count | |
weight /= class_weights[s]["log_complement"][targetc] | |
result += weight | |
#result -= math.log(classProbs[targetc]) | |
results.append((-result, targetc)) #以降の処理の共通化のために反転させている | |
results.sort(reverse=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment