Created
December 21, 2012 16:45
-
-
Save agasiev/4353940 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 | |
import string | |
import re | |
import sys | |
from SqlHelper import SqlHelper | |
sql = SqlHelper() | |
allwords = dict() | |
def insertWord(postid, word, sql): | |
res = sql.query(u'insert into imho_words (word, cnt) values (upper("%s"), 1) on duplicate key update cnt = cnt + 1' % word.decode("utf-8")) | |
count = sql.rquery('SELECT count(*) from imho_w2p where wordid = %d and postid = %d' % (res, postid))[0][0] | |
q = "" | |
if count == 0: | |
q = "insert into imho_w2p (wordid, postid, cnt) values (%d, %d, 1)" % (res, postid) | |
else: | |
q = "update imho_w2p set cnt = cnt + 1 where wordid = %d and postid = %d" % (res, postid) | |
sql.query(q) | |
cnt = 0 | |
cntr = 0 | |
for item in sql.rquery('select * from imhonet'): | |
cnt+=1 | |
cntr+=1 | |
words = dict() | |
wcnt = 0 | |
for word in item[5].encode('utf-8').split(): | |
for char in [',','.','-','+','!','?','_',';',':',"'","\""]: | |
word = word.replace(char, '') | |
word = word.replace('\\', '/') | |
if len(word) > 0: | |
wcnt+=1 | |
insertWord(int(item[0]), word, sql) | |
print "Performed %d records with %d words." % (cnt, wcnt) | |
if cntr > 100: | |
sql.commit() | |
cntr = 0 | |
sql.commit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment