Last active
January 15, 2018 04:52
-
-
Save ki111/e597f9de1578e258a64bd5f8aff13620 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: UTF-8 | |
import nltk | |
import csv | |
# 変数 | |
word_list = open('word-verified.csv','r') | |
csv = csv.reader(word_list) | |
file = open('word_merged.csv','w') | |
word = [] | |
count = [] | |
definition =[] | |
tag = [] | |
row = 0 | |
# csv読み込み | |
for c in csv: | |
word.append(c[0]) | |
count.append(c[1]) | |
definition.append(c[2]) | |
# 検索したい単語 | |
for w in word: | |
print(row) | |
if definition[row] != "*not valid*": | |
comp = definition[row] | |
for i in range(row,len(word)): | |
# 単語定義が同じ場合はカウントを足し合わせ、not vaildを挿入 | |
if row != i and comp == definition[i]: | |
# 定義(現在形)を代入 | |
word[row] = definition[i] | |
count[row] = int(count[row]) + int(count[i]) | |
definition[i] = "*not valid*" | |
row = row + 1 | |
row = 0 | |
for w in word: | |
# not validの場合は飛ばす | |
if definition[row] != "*not valid*": | |
file.write(str(word[row])+","+str(count[row])+"\n") | |
row = row + 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment