Skip to content

Instantly share code, notes, and snippets.

@agasiev
Created December 21, 2012 16:45
Show Gist options
  • Save agasiev/4353940 to your computer and use it in GitHub Desktop.
Save agasiev/4353940 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8
import string
import re
import sys
from SqlHelper import SqlHelper
sql = SqlHelper()
allwords = dict()
def insertWord(postid, word, sql):
res = sql.query(u'insert into imho_words (word, cnt) values (upper("%s"), 1) on duplicate key update cnt = cnt + 1' % word.decode("utf-8"))
count = sql.rquery('SELECT count(*) from imho_w2p where wordid = %d and postid = %d' % (res, postid))[0][0]
q = ""
if count == 0:
q = "insert into imho_w2p (wordid, postid, cnt) values (%d, %d, 1)" % (res, postid)
else:
q = "update imho_w2p set cnt = cnt + 1 where wordid = %d and postid = %d" % (res, postid)
sql.query(q)
cnt = 0
cntr = 0
for item in sql.rquery('select * from imhonet'):
cnt+=1
cntr+=1
words = dict()
wcnt = 0
for word in item[5].encode('utf-8').split():
for char in [',','.','-','+','!','?','_',';',':',"'","\""]:
word = word.replace(char, '')
word = word.replace('\\', '/')
if len(word) > 0:
wcnt+=1
insertWord(int(item[0]), word, sql)
print "Performed %d records with %d words." % (cnt, wcnt)
if cntr > 100:
sql.commit()
cntr = 0
sql.commit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment