Skip to content

Instantly share code, notes, and snippets.

@jiacai2050
Created May 31, 2023 07:32
Show Gist options
  • Save jiacai2050/88dabe0e288db61dc0be2835528cd277 to your computer and use it in GitHub Desktop.
Save jiacai2050/88dabe0e288db61dc0be2835528cd277 to your computer and use it in GitHub Desktop.
[dependencies]
rand = "0.8.5"
xorfilter-rs = { git = "https://github.com/datafuse-extras/xorfilter", features = [
"cbordata",
], tag = "databend-alpha.4" }
use std::collections::HashSet;
use rand::Rng;
use xorfilter::{BuildHasherDefault, Xor8Builder};
fn random() -> Vec<u8> {
let mut rng = rand::thread_rng();
let bytes: Vec<u8> = (0..100).map(|_| rng.gen()).collect();
bytes
}
fn run(value_num: usize) {
let mut b = Xor8Builder::<BuildHasherDefault>::default();
let mut set = HashSet::with_capacity(value_num);
for _ in 0..value_num {
let key = random();
set.insert(key.clone());
b.insert(&key);
// b.populate(&[random(), random()]);
}
let filter = b.build().unwrap();
let bs = filter.to_bytes();
println!(
"key_num:{}, len:{}, byte_per_key:{}",
set.len(),
bs.len(),
bs.len() as f64 / value_num.max(1) as f64
)
}
pub fn main() {
run(0);
run(1);
run(5);
run(10);
run(100);
run(1000);
run(2000);
run(3000);
run(4000);
run(8000);
}
@jiacai2050
Copy link
Author

key_num:0, len:54, byte_per_key:54
key_num:1, len:57, byte_per_key:57
key_num:5, len:63, byte_per_key:12.6
key_num:10, len:69, byte_per_key:6.9
key_num:100, len:177, byte_per_key:1.77
key_num:1000, len:1284, byte_per_key:1.284
key_num:2000, len:2514, byte_per_key:1.257
key_num:3000, len:3744, byte_per_key:1.248
key_num:4000, len:4974, byte_per_key:1.2435
key_num:8000, len:9894, byte_per_key:1.23675

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment