Skip to content

Instantly share code, notes, and snippets.

@jkschneider
Last active August 29, 2015 14:11
Show Gist options
  • Save jkschneider/18846c88a1f96063cf0f to your computer and use it in GitHub Desktop.
Save jkschneider/18846c88a1f96063cf0f to your computer and use it in GitHub Desktop.
Opposite of a bloom filter... reports "not in set" whenever multiset reports 0 or >1.
/**
* The opposite of a bloom filter
*/
class GloomFilter<T> {
Multiset<Long> coordinateFilter
Funnel<T> funnel
long expectedSize
static create(int expectedSize, Funnel<T> funnel) {
return new GloomFilter<T>(expectedSize: expectedSize,
coordinateFilter: HashMultiset.create(expectedSize),
funnel: funnel)
}
GloomFilter<T> put(T t) {
coordinateFilter.add(hash(t))
return this
}
boolean mightNotContain(T t) {
coordinateFilter.count(hash(t)) != 1
}
private Long hash(T t) {
return Hashing.murmur3_128().hashObject(t, funnel).asLong() % expectedSize
}
}
class GloomFilterSpec extends Specification {
def "gloom filter reports 'might not contain' iff the underlying multiset's cardinality != 1"() {
when:
def filter = GloomFilter.create(10, new Funnel<Integer>() {
@Override
void funnel(Integer integer, PrimitiveSink into) {
into.putInt(integer % 10)
}
})
then:
filter.mightNotContain(9) // cardinality of 9 == 0
!filter.put(9).mightNotContain(9) // cardinality of 9 == 1
filter.put(19).mightNotContain(9) // cardinality of 9 == 2
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment