Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
<?php
class BloomFilter
{
const BYTE = 8;
private $filter;
private $filter_length;
private $bit_num;
private $hash_num;
public function __construct($byte_size, $hash_num, $filter = null)
{
$this->filter_length = $byte_size;
$this->bit_num = $this->filter_length * self::BYTE;
$this->hash_num = $hash_num;
if ($filter === null) {
$this->filter = str_repeat("\0", $this->filter_length);
} else {
$this->filter = $filter;
}
}
public function add($val)
{
for ($k = 0; $k < $this->hash_num; $k++) {
$hash64 = $this->hash($val . $k);
$flag_up_block = (int) ($hash64 / self::BYTE);
$flag_up_bit = $hash64 % self::BYTE;
$this->filter[$flag_up_block] = chr(ord($this->filter[$flag_up_block]) | (1 << $flag_up_bit));
}
}
public function query($val)
{
for ($k = 0; $k < $this->hash_num; $k++) {
$hash64 = $this->hash($val . $k);
$flag_up_block = (int) ($hash64 / self::BYTE);
$flag_up_bit = $hash64 % self::BYTE;
if ((ord($this->filter[$flag_up_block]) & (1 << $flag_up_bit)) === 0) {
return false;
}
}
return true;
}
private function hash($val)
{
$hash = hexdec(substr(md5($val), 0, 15)); // 16文字までカットすると、負の値になることがあって以後の処理がうまく動かない
return $hash % ($this->bit_num);
}
public function toArray()
{
return array(
'bit_num' => $this->filter_length * self::BYTE,
'hash_num' => $this->hash_num,
'filter' => $this->filter,
);
}
public static function retrieveFromArray(array $filter_status)
{
return new BloomFilter($filter_status['bit_num'] / self::BYTE, $filter_status['hash_num'], $filter_status['filter']);
}
public function getPositiveProbability()
{
$bitcount = 0;
for ($i = 0; $i < $this->filter_length; $i++) {
$byte = ord($this->filter[$i]);
for ($k = 0; $k < self::BYTE; $k++) {
if ((($byte >> $k) & 1) === 1) $bitcount++;
}
}
$bitup_probability = $bitcount / ($this->filter_length * self::BYTE);
return pow($bitup_probability, $this->hash_num);
}
}
//use Example
/*
$records = 500;
$Q = 1000;
$f = new BloomFilter(1024, 4);
for($i=0;$i<$records;$i++) $f->add(md5($i));
$corrects = 0;
$errors = 0;
for($i=0;$i<$Q;$i++) {
if ($f->query(md5($i))) {
if ($i < $records)$corrects++;
else $errors++;
//existence !!
}
}
echo "CORRECTS: $corrects ERRORS: $errors" . PHP_EOL;
$array = $f->toArray();
$f = BloomFilter::retrieveFromArray($array);
echo "RETRIEVED!" . PHP_EOL;
$corrects = 0;
$errors = 0;
for($i=0;$i<$Q;$i++) {
if ($f->query(md5($i))) {
if ($i < $records)$corrects++;
else $errors++;
//existence !!
}
}
echo "CORRECTS: $corrects ERRORS: $errors" . PHP_EOL;
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.