Skip to content

Instantly share code, notes, and snippets.

@evanlh
Created May 7, 2011 03:14
Show Gist options
  • Save evanlh/960165 to your computer and use it in GitHub Desktop.
Save evanlh/960165 to your computer and use it in GitHub Desktop.
package katas.kata5;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.BitSet;
public class BloomFilter {
private static final int BITS = 19;
private static final int NBITS = 1 << BITS;
private static final int MASK = NBITS - 1;
private final BitSet bits;
private int wordsTrained = 0;
public BloomFilter() {
bits = new BitSet(NBITS);
}
public void train(Reader in) throws IOException {
BufferedReader r = new BufferedReader(in);
String line;
while (null != (line = r.readLine())) {
trainWord(line.trim());
++wordsTrained;
}
System.out.println(bits.cardinality());
System.out.println((bits.cardinality() * 100.0) / NBITS);
}
public int getWordsTrained() {
return wordsTrained;
}
public boolean has(String word) {
return check(hashes(word));
}
public void trainWord(String word) {
insert(hashes(word));
}
private boolean check(int[] hashes) {
for (int hash : hashes) {
if (!bits.get(hash)) {
return false;
}
}
return true;
}
private int[] hashes(String word) {
byte[] digest = digest(word);
int[] hashes = new int[digest.length / ((BITS + 7) / 8)];
int i = 0;
int currentHash = 0, currentBits = 0;
for (int j = 0; j < digest.length; j++) {
if (currentBits >= BITS) {
hashes[i++] = currentHash & MASK;
currentHash = 0;
currentBits = 0;
}
int b = digest[j];
currentHash = currentHash << 8 | (b & 0xFF);
currentBits += 8;
}
return hashes;
}
private void insert(int[] hashes) {
for (int hash : hashes) {
bits.set(hash);
}
}
private byte[] digest(String word) {
try {
return MessageDigest.getInstance("MD5").digest(word.getBytes());
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment