Skip to content

Instantly share code, notes, and snippets.

@srgoogleguy
Created June 22, 2016 03:25
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save srgoogleguy/d45dd5ed6b32c66b8e81211ba5c9a4a8 to your computer and use it in GitHub Desktop.
Save srgoogleguy/d45dd5ed6b32c66b8e81211ba5c9a4a8 to your computer and use it in GitHub Desktop.
A Bloom Filter Written in PHP
<?php
/**
* This class relies on the GMP extension to be loaded in PHP
*
*/
class Bloomfilter
{
protected $maxElements, $probability, $space, $hashes, $filter;
public function __construct($numElements, $probability = 0.01)
{
$this->maxElements = $numElements;
$this->probability = $probability;
}
protected function calculateSpace($numElements, $probability)
{
return (int) ceil(($numElements * (log($probability)) / (log(2) ** 2)) * -1);
}
protected function calculateHashFunctions($numElements, $space)
{
return (int) ceil($this->space / $this->maxElements * log(2));
}
protected function numHashFunctionsAvailable()
{
$num = 0;
foreach(hash_algos() as $algo) {
$num += count(unpack('J*', hash($algo, 'bloom', true)));
}
return $num;
}
protected function hash($element)
{
$hashes = [];
foreach(hash_algos() as $algo) {
foreach(unpack('P*', hash($algo, $element, true)) as $hash) {
$hash = gmp_init(sprintf("%u", $hash));
$hashes[] = ($hash % $this->space);
if (count($hashes) >= $this->hashes) {
break 2;
}
}
}
return $hashes;
}
protected function isInit()
{
if (!strlen($this->filter)) {
throw new Exception("Filter has not been initialized!");
}
}
public function initialize()
{
$this->space = $this->calculateSpace($this->maxElements, $this->probability);
$this->hashes = $this->calculateHashFunctions($this->maxElements, $this->space);
if ($this->hashes > $this->numHashFunctionsAvailable()) {
throw new Exception("Cannot initialize filter with available system hash functions!");
}
$this->filter = str_repeat("\0", ceil($this->space / 8));
}
public function set($element)
{
$this->isInit();
if (!is_scalar($element)) {
$element = serialize($element);
}
$hashes = $this->hash($element);
foreach($hashes as $hash) {
$offset = (int) floor($hash / 8);
$bit = (int) ($hash % 8);
$this->filter[$offset] = chr(ord($this->filter[$offset]) | (2 ** $bit));
}
}
public function check($element)
{
$this->isInit();
if (!is_scalar($element)) {
$element = serialize($element);
}
$hashes = $this->hash($element);
foreach($hashes as $hash) {
$offset = (int) floor($hash / 8);
$bit = (int) ($hash % 8);
if (!(ord($this->filter[$offset]) & (2 ** $bit))) {
return false;
}
}
return true;
}
public function getFilter()
{
return $this->filter;
}
}
<?php
try {
$bloomfilter = new BloomFilter(100);
$bloomfilter->initialize();
$bloomfilter->set("Hello PHP!");
var_dump($bloomfilter->check("Hello PHP!"));
} catch(Exception $e) {
var_dump($e);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment