Skip to content

Instantly share code, notes, and snippets.

@feymartynov
Last active October 3, 2022 06:37
Show Gist options
  • Save feymartynov/d27de97deaef4107fe4778462fe2a3c3 to your computer and use it in GitHub Desktop.
Save feymartynov/d27de97deaef4107fe4778462fe2a3c3 to your computer and use it in GitHub Desktop.
vector2 rust
[package]
name = "vector2"
version = "0.1.0"
edition = "2021"
[dev-dependencies]
rand = "0.8"
criterion = "0.4"
[[bench]]
name = "vector2"
harness = false
use std::cmp::Ordering;
static BITS_TABLE: [u64; 256] = build_bits_table();
const fn build_bits_table() -> [u64; 256] {
let mut bits_table = [0; 256];
let mut i = 0;
while i < 256 {
bits_table[i] = 1 << (i & 0x3f);
i += 1;
}
bits_table
}
fn bytes_to_bits(bytes: &[u8]) -> [u64; 4] {
let mut bits = [0; 4];
for i in 0..(bytes.len()) {
bits[(bytes[i] >> 6) as usize] |= BITS_TABLE[bytes[i] as usize];
}
bits
}
pub fn intersect_array<T: Ord>(a: &[T], b: &[T]) {
let an = a.len();
let bn = b.len();
let mut i = 0;
let mut j = 0;
while i < an && j < bn {
match a[i].cmp(&b[i]) {
Ordering::Less => i += 1,
Ordering::Greater => j += 1,
Ordering::Equal => {
i += 1;
j += 1;
}
}
}
}
#[derive(Debug)]
struct Pack {
a: u8,
b: u8,
c: u8,
d: [u8; 256],
}
impl Default for Pack {
fn default() -> Self {
Self {
a: 0,
b: 0,
c: 0,
d: [0; 256],
}
}
}
pub fn compress_array(array: Vec<u32>) -> Vec<u8> {
let mut r = Vec::new();
let mut p = Pack::default();
for v in array {
let a = (v >> 16) as u8;
let b = (v >> 8) as u8;
let d = v as u8;
if a != p.a || b != p.b {
if p.c > 0 {
r.push(p.a);
r.push(p.b);
r.push(p.c);
r.extend_from_slice(&p.d[..(p.c as usize)]);
}
p = Pack::default();
p.a = a;
p.b = b;
}
p.d[p.c as usize] = d;
p.c += 1;
}
if p.c > 0 {
r.push(p.a);
r.push(p.b);
r.push(p.c);
r.extend_from_slice(&p.d[..(p.c as usize)]);
}
r
}
#[derive(Debug, Default)]
struct Head {
base: u16,
size: u8,
}
pub fn intersect_vector_old(a: &[u8], b: &[u8]) -> u64 {
let mut r = 0;
let an = a.len();
let bn = b.len();
let mut i = 0;
let mut j = 0;
while i < an && j < bn {
let x = unsafe { &*(a.as_ptr().add(i) as *const Head) };
let y = unsafe { &*(b.as_ptr().add(j) as *const Head) };
match x.base.cmp(&y.base) {
Ordering::Less => i += 3 + (x.size as usize),
Ordering::Greater => j += 3 + (y.size as usize),
Ordering::Equal => {
i += 3;
let mut bits0 = bytes_to_bits(&a[i..(i + (x.size as usize))]);
j += 3;
let bits1 = bytes_to_bits(&b[j..(j + (y.size as usize))]);
bits0[0] &= bits1[0];
bits0[1] &= bits1[1];
bits0[2] &= bits1[2];
bits0[3] &= bits1[3];
r += bits0[0] + bits0[1] + bits0[2] + bits0[3];
i += x.size as usize;
j += y.size as usize;
}
}
}
r
}
pub fn intersect_vector_new(a: &[u8], b: &[u8]) -> u64 {
let mut r = 0_u64;
let mut i = 0;
let mut j = 0;
while i < a.len() && j < b.len() {
let x_base = (a[i] as u16) << 8 | (a[i + 1] as u16);
let y_base = (b[j] as u16) << 8 | (b[j + 1] as u16);
match x_base.cmp(&y_base) {
Ordering::Less => i += 3 + (a[i + 2] as usize),
Ordering::Greater => j += 3 + (b[j + 2] as usize),
Ordering::Equal => {
let x_size = a[i + 2] as usize;
let y_size = b[j + 2] as usize;
let mut h = [0; 256];
r = 0;
i += 3;
for v in a[i..(i + x_size)].iter().copied() {
h[v as usize] = 1;
}
j += 3;
for v in b[j..(j + y_size)].iter().copied() {
let ok = h[v as usize] as u64;
h[r as usize] = v;
r += ok;
}
i += x_size;
j += y_size;
}
}
}
r
}
////////////////////////////////////////////////////////////////////////////////
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn vector_intersect_old() {
let v0 = compress_array(vec![0, 1, 4, 63, 64, 67, 78, 240, 512]);
let v1 = compress_array(vec![0, 1, 3, 63, 64, 77, 89, 205, 240, 512, 1024, 2047]);
let r = intersect_vector_old(&v0, &v1);
assert_eq!(r, 9223653511831486469);
}
#[test]
fn vector_intersect_new() {
let v0 = compress_array(vec![0, 1, 4, 63, 64, 67, 78, 240, 512]);
let v1 = compress_array(vec![0, 1, 3, 63, 64, 77, 89, 205, 240, 512, 1024, 2047]);
let r = intersect_vector_new(&v0, &v1);
assert_eq!(r, 1);
}
}
extern crate criterion;
extern crate rand;
use criterion::*;
use rand::prelude::*;
////////////////////////////////////////////////////////////////////////////////
const FIRST_ARRAY_SIZE: usize = 128 * 1024;
const SECOND_ARRAY_SIZE: usize = 1024 * 1024;
fn build_compressed_arrays() -> (Vec<u8>, Vec<u8>) {
let (a0, a1) = build_arrays();
let a0 = vector2::compress_array(a0);
let a1 = vector2::compress_array(a1);
(a0, a1)
}
fn build_arrays() -> (Vec<u32>, Vec<u32>) {
let a0 = rand_array(FIRST_ARRAY_SIZE, FIRST_ARRAY_SIZE as u32);
let a1 = rand_array(SECOND_ARRAY_SIZE, SECOND_ARRAY_SIZE as u32);
(a0, a1)
}
fn rand_array(n: usize, max: u32) -> Vec<u32> {
let mut rng = rand::thread_rng();
let mut a = (0..n).map(|_| rng.gen_range(0..max)).collect::<Vec<_>>();
a.sort_unstable();
a.dedup();
a
}
////////////////////////////////////////////////////////////////////////////////
fn intersect_array(c: &mut Criterion) {
c.bench_function("intersect_array", |bencher| {
bencher.iter_batched(
build_arrays,
|(ref a0, ref a1)| vector2::intersect_array(a0, a1),
BatchSize::SmallInput,
);
});
}
fn intersect_vector_old(c: &mut Criterion) {
c.bench_function("intersect_vector_old", |bencher| {
bencher.iter_batched(
build_compressed_arrays,
|(ref a0, ref a1)| vector2::intersect_vector_old(a0, a1),
BatchSize::SmallInput,
)
});
}
fn intersect_vector_new(c: &mut Criterion) {
c.bench_function("intersect_vector_new", |bencher| {
bencher.iter_batched(
build_compressed_arrays,
|(ref a0, ref a1)| vector2::intersect_vector_new(a0, a1),
BatchSize::SmallInput,
)
});
}
////////////////////////////////////////////////////////////////////////////////
criterion_group!(
benches,
intersect_array,
intersect_vector_old,
intersect_vector_new
);
criterion_main!(benches);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment