Created
December 22, 2012 10:05
-
-
Save agasiev/4358323 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 | |
from SqlHelper import SqlHelper | |
import string | |
import re | |
import sys | |
from SqlHelper import SqlHelper | |
sql = SqlHelper() | |
allwords = dict() | |
def insertWord(postid, word, sql, neutral, negative, positive): | |
res = sql.query(u'''insert into market_words (word, cnt, neutral, negative, positive) | |
values (upper("%s"), 1, %d, %d, %d) on duplicate key update cnt = cnt + 1, | |
neutral = neutral + %d, negative = negative + %d, positive = positive + %d | |
''' % (word.decode("utf-8"), neutral, negative, positive, neutral, negative, positive)) | |
q = "insert into market_w2p (wordid, postid, cnt, hash) values (%d, %d, 1, md5('%d_%d')) on duplicate key update cnt = cnt + 1" % (res, postid, res, postid) | |
sql.query(q) | |
cnt = 0 | |
cntr = 0 | |
for item in sql.rquery('select * from market'): | |
cnt+=1 | |
cntr+=1 | |
words = dict() | |
wcnt = 0 | |
for word in item[5].encode('utf-8').split(): | |
for char in [',','.','-','+','!','?','_',';',':',"'","\""]: | |
word = word.replace(char, '') | |
word = word.replace('\\', '/') | |
if len(word) > 0: | |
wcnt+=1 | |
insertWord(int(item[0]), word, sql, 1, 0, 0) | |
for word in item[6].encode('utf-8').split(): | |
for char in [',','.','-','+','!','?','_',';',':',"'","\""]: | |
word = word.replace(char, '') | |
word = word.replace('\\', '/') | |
if len(word) > 0: | |
wcnt+=1 | |
insertWord(int(item[0]), word, sql, 0, 0, 1) | |
for word in item[7].encode('utf-8').split(): | |
for char in [',','.','-','+','!','?','_',';',':',"'","\""]: | |
word = word.replace(char, '') | |
word = word.replace('\\', '/') | |
if len(word) > 0: | |
wcnt+=1 | |
insertWord(int(item[0]), word, sql, 0, 1, 0) | |
print "Performed %d records with %d words." % (cnt, wcnt) | |
if cntr > 100: | |
sql.commit() | |
cntr = 0 | |
sql.commit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment