Skip to content

Instantly share code, notes, and snippets.

@pocketwalker
Created October 13, 2012 17:13
Show Gist options
  • Save pocketwalker/3885405 to your computer and use it in GitHub Desktop.
Save pocketwalker/3885405 to your computer and use it in GitHub Desktop.
使用BloomFilter过滤用户long型IDs
import java.io.Serializable;
import java.util.BitSet;
import java.util.Random;
//MD5算法最为基础来构造哈希函数
/*
*for (int i = 0; i < funNum; i++){
* //输入URL地址拼接上Hash函数的编号
* String input = url+i.toString();
* //散列值取MD5摘要的后64位与比特向量大小的的余数
* hash =(long)Md5(input).getLast64bit() % (long)bitSetSize;
*}
*/
/**
* Long类型元素的布隆过滤器
*/
public class BloomFilter implements Serializable {
private static final long serialVersionUID = 1L;
public static final int ELEM_NUM = 1000; // 欲容纳的元素个数
public static final double PERCENTAGE = 0.001; // 希望的误差率
private int hashNum; // hash函数的数量
private int size; // 位向量的長度
private BitSet bitVecter; // 位向量
public BloomFilter() {
size = (int) Math.abs(ELEM_NUM * Math.log(PERCENTAGE)
/ (Math.log(2) * Math.log(2))) + 1;
hashNum = (int) (Math.log(2) * ((double) size / ELEM_NUM));
bitVecter = new BitSet(size);
}
/**
* 查找元素是否在集合中
*/
public boolean search(Long elem) {
boolean flag = true;
int temp;
Random random = new Random(elem);
for (int i = 0; i < hashNum; i++) {
temp = random.nextInt(size);
if (!bitVecter.get(temp)) {// 元素不在集合中
bitVecter.set(temp);
flag = false;
}
}
return flag;
}
/**
* 获取位向量的长度
*/
public int size() {
return bitVecter.size();
}
public int getHashNum() {
return hashNum;
}
public void setHashNum(int hashNum) {
this.hashNum = hashNum;
}
public int getSize() {
return size;
}
public void setSize(int size) {
this.size = size;
}
public BitSet getBitVecter() {
return bitVecter;
}
public void setBitVecter(BitSet bitVecter) {
this.bitVecter = bitVecter;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment