Skip to content

Instantly share code, notes, and snippets.

@mocobeta
Last active December 13, 2020 06:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mocobeta/e8cc2aa6348465c6dcc9ac99e3e87095 to your computer and use it in GitHub Desktop.
Save mocobeta/e8cc2aa6348465c6dcc9ac99e3e87095 to your computer and use it in GitHub Desktop.
compression algorithms performance comparison
use rand::prelude::*;
fn main() {
let mut rng = thread_rng();
let p: f32 = 0.00001;
let max_doc: usize = 1_000_000;
let mut postings: Vec<usize> = vec![rng.gen_range(1, 1000) as usize];
loop {
let next = postings.last().unwrap() + geo_random(p);
if next > max_doc {
break;
} else {
postings.push(next);
}
}
//println!("dummy postings: {:?}", postings);
let postings_delta = delta_values(&postings);
let gamma_coded = encode_gamma(&postings_delta);
let delta_coded = encode_delta(&postings_delta);
let m_opt: u32 = ((2.0 - (postings.len() as f64 / max_doc as f64)).ln()
/ -(1.0 - (postings.len() as f64 / max_doc as f64)).ln())
.ceil() as u32;
let golomb_coded = encode_golomb(&postings_delta, m_opt);
let m_opt = 2u32.pow((m_opt as f64).log2().ceil() as u32);
let rice_coded = encode_rice(&postings_delta, m_opt);
let vbyte_coded = encode_vbyte(&postings_delta);
println!("Length (gamma) = {} bytes", gamma_coded.to_bytes().len());
println!("Length (delta) = {} bytes", delta_coded.to_bytes().len());
println!("Length (golomb) = {} bytes", golomb_coded.to_bytes().len());
println!("Length (rice) = {} bytes", rice_coded.to_bytes().len());
println!("Length (vbyte) = {} bytes", vbyte_coded.len());
}
fn delta_values(li: &[usize]) -> Vec<usize> {
if li.len() == 0 {
return vec![];
}
let mut res = Vec::new();
res.push(li[0]);
for i in 1..li.len() {
let delta = li[i] - li[i - 1];
assert!(delta > 0);
res.push(delta);
}
res
}
fn geo_random(p: f32) -> usize {
let mut rng = thread_rng();
let rand: f32 = rng.gen();
(rand.ln() / (1.0 - p).ln()).ceil() as usize
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment