Skip to content

Instantly share code, notes, and snippets.

@yfgeek
Created October 28, 2017 14:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yfgeek/2f07921322d4d65fbe05bef0eb897fb9 to your computer and use it in GitHub Desktop.
Save yfgeek/2f07921322d4d65fbe05bef0eb897fb9 to your computer and use it in GitHub Desktop.
# coding=utf-8
# python version:2.7
from jpype import *
import pandas as pd
reload(sys)
sys.setdefaultencoding('utf-8')
import pymysql
from pybloomfilter import BloomFilter
class Filter(object):
def __init__(self):
self.db = pymysql.connect("localhost", "root", "", "uob", use_unicode=True, charset="utf8")
self.cursor = self.db.cursor()
self.bfilter = BloomFilter(1000, 0.001, 'uob.bloom') # 创建布隆过滤器
def boolomFilter(self):
sql = "SELECT id,name,url FROM list;"
self.cursor.execute(sql)
allData = self.cursor.fetchall()
if allData:
for rec in allData:
if rec[1] not in self.bfilter:
self.bfilter.add(rec[1])
insertsql = "INSERT INTO filter (list_id,name,url) VALUES(" + str(rec[0]) + ",\"" + rec[1] + "\",\"" +rec[2] + "\");"
print insertsql
try:
self.cursor.execute(insertsql)
self.db.commit()
except:
self.db.rollback()
print "error"
if __name__ == "__main__":
obj_filter = Filter()
obj_filter.boolomFilter()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment