Skip to content

Instantly share code, notes, and snippets.

@ki111
Last active January 15, 2018 04:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ki111/e597f9de1578e258a64bd5f8aff13620 to your computer and use it in GitHub Desktop.
Save ki111/e597f9de1578e258a64bd5f8aff13620 to your computer and use it in GitHub Desktop.
# coding: UTF-8
import nltk
import csv
# 変数
word_list = open('word-verified.csv','r')
csv = csv.reader(word_list)
file = open('word_merged.csv','w')
word = []
count = []
definition =[]
tag = []
row = 0
# csv読み込み
for c in csv:
word.append(c[0])
count.append(c[1])
definition.append(c[2])
# 検索したい単語
for w in word:
print(row)
if definition[row] != "*not valid*":
comp = definition[row]
for i in range(row,len(word)):
#  単語定義が同じ場合はカウントを足し合わせ、not vaildを挿入
if row != i and comp == definition[i]:
#  定義(現在形)を代入
word[row] = definition[i]
count[row] = int(count[row]) + int(count[i])
definition[i] = "*not valid*"
row = row + 1
row = 0
for w in word:
#  not validの場合は飛ばす
if definition[row] != "*not valid*":
file.write(str(word[row])+","+str(count[row])+"\n")
row = row + 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment