pizzacat83/wakachi.py

## wakachi.py
import MeCab
import math

tagger = MeCab.Tagger("")
node=tagger.parseToNode(sentence)

dic={}
ad={}

g=open("corpus/wiki_wakachi.txt","w")
lcount=0
for line in open("corpus/wiki.txt","r",encoding="utf-8"):
    if line=="":
        continue
    lcount+=1
    node=tagger.parseToNode(line)
    while node:
        feature = node.feature.split(",")
        base = feature[6]
        if base == "*":
            node = node.next
            continue
        g.write(base+" ")
        if not feature[0] in ["接頭詞", "助詞","助動詞","記号","その他","未知語"]:
            dic[base] = dic[base]+1 if base in dic else 1
        if  feature[0] in ["副詞", "形容詞"] or feature[1] in ["形容動詞語幹"]:
            ad[base] = ad[base]+1 if base in ad else 1
        node = node.next

for word in dic:
    dic[word]/=lcount

g=open("frequency.txt","w")
for word, frq in dic.items():
    print(word, -math.log10(frq), file=g)

g=open("ad.txt","w")
for word, frq in ad.items():
    print(word, -math.log10(frq), file=g)
	import MeCab
	import math

	tagger = MeCab.Tagger("")
	node=tagger.parseToNode(sentence)

	dic={}
	ad={}

	g=open("corpus/wiki_wakachi.txt","w")
	lcount=0
	for line in open("corpus/wiki.txt","r",encoding="utf-8"):
	if line=="":
	continue
	lcount+=1
	node=tagger.parseToNode(line)
	while node:
	feature = node.feature.split(",")
	base = feature[6]
	if base == "*":
	node = node.next
	continue
	g.write(base+" ")
	if not feature[0] in ["接頭詞", "助詞","助動詞","記号","その他","未知語"]:
	dic[base] = dic[base]+1 if base in dic else 1
	if feature[0] in ["副詞", "形容詞"] or feature[1] in ["形容動詞語幹"]:
	ad[base] = ad[base]+1 if base in ad else 1
	node = node.next

	for word in dic:
	dic[word]/=lcount

	g=open("frequency.txt","w")
	for word, frq in dic.items():
	print(word, -math.log10(frq), file=g)

	g=open("ad.txt","w")
	for word, frq in ad.items():
	print(word, -math.log10(frq), file=g)