Skip to content

Instantly share code, notes, and snippets.

@agasiev
Created December 22, 2012 10:05
Show Gist options
  • Save agasiev/4358323 to your computer and use it in GitHub Desktop.
Save agasiev/4358323 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8
from SqlHelper import SqlHelper
import string
import re
import sys
from SqlHelper import SqlHelper
sql = SqlHelper()
allwords = dict()
def insertWord(postid, word, sql, neutral, negative, positive):
res = sql.query(u'''insert into market_words (word, cnt, neutral, negative, positive)
values (upper("%s"), 1, %d, %d, %d) on duplicate key update cnt = cnt + 1,
neutral = neutral + %d, negative = negative + %d, positive = positive + %d
''' % (word.decode("utf-8"), neutral, negative, positive, neutral, negative, positive))
q = "insert into market_w2p (wordid, postid, cnt, hash) values (%d, %d, 1, md5('%d_%d')) on duplicate key update cnt = cnt + 1" % (res, postid, res, postid)
sql.query(q)
cnt = 0
cntr = 0
for item in sql.rquery('select * from market'):
cnt+=1
cntr+=1
words = dict()
wcnt = 0
for word in item[5].encode('utf-8').split():
for char in [',','.','-','+','!','?','_',';',':',"'","\""]:
word = word.replace(char, '')
word = word.replace('\\', '/')
if len(word) > 0:
wcnt+=1
insertWord(int(item[0]), word, sql, 1, 0, 0)
for word in item[6].encode('utf-8').split():
for char in [',','.','-','+','!','?','_',';',':',"'","\""]:
word = word.replace(char, '')
word = word.replace('\\', '/')
if len(word) > 0:
wcnt+=1
insertWord(int(item[0]), word, sql, 0, 0, 1)
for word in item[7].encode('utf-8').split():
for char in [',','.','-','+','!','?','_',';',':',"'","\""]:
word = word.replace(char, '')
word = word.replace('\\', '/')
if len(word) > 0:
wcnt+=1
insertWord(int(item[0]), word, sql, 0, 1, 0)
print "Performed %d records with %d words." % (cnt, wcnt)
if cntr > 100:
sql.commit()
cntr = 0
sql.commit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment