Skip to content

Instantly share code, notes, and snippets.

@ki111
Created December 29, 2017 03:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ki111/04287b179b8a662be11d6be7e109b3e2 to your computer and use it in GitHub Desktop.
Save ki111/04287b179b8a662be11d6be7e109b3e2 to your computer and use it in GitHub Desktop.
# coding: UTF-8
from selenium import webdriver
from bs4 import BeautifulSoup
from datetime import datetime
import time
import pandas
import numpy
# 変数準備
csv = pandas.read_csv('word.csv')
csv_nomalized = [[]]
row = 0
comp_word = 0
word1 = csv['word']
word2 = []
count = csv['count']
file = open('word-dot-eliminated.csv','w')
file.write('word,count\n')
word2 = word1
# 同一の単語があるか調べる
for w in word1:
print(row)
# ドットを含むか調べる
if str(w).find('.') != -1:
# ドットを抜いたとき同じ単語がない場合のフラグ
flag = 0
# ドットを抜いたとき同じ単語があるか調べる
for i in range(0,len(word1)-1):
# 単語が同じであればカウント数を足し合わせる
if w.replace(".","",1000) == word2[i]:
count[i] = count[i] + count[row]
word2[row] = "**not valid**"
flag = 1
break
if flag == 0:
word2[row] = w.replace(".","",1000)
row = row + 1
row = 0
for w in word2:
# 足し合わせ後の単語は無視する
if w != "**not valid**":
file.write(str(w)+","+str(count[row])+"\n")
row = row + 1
file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment