Skip to content

Instantly share code, notes, and snippets.

@rob-p
Last active June 8, 2024 13:42
Show Gist options
  • Save rob-p/cadc9bd2055a3464aca48cdea7e4ab47 to your computer and use it in GitHub Desktop.
Save rob-p/cadc9bd2055a3464aca48cdea7e4ab47 to your computer and use it in GitHub Desktop.
use anyhow;
use ndarray::Array2;
use std::fs;
use std::cmp::Ordering;
fn naive_table(text: &str) -> Vec<u32> {
let text = text.as_bytes();
assert!(text.len() <= u32::MAX as usize);
let mut table = vec![0u32; text.len()];
for (i, element) in table.iter_mut().enumerate() {
*element = i as u32;
}
table.sort_by(|&a, &b| text[a as usize..].cmp(&text[b as usize..]));
table
}
struct BTreeState<'a> {
nblocks: usize,
B: usize,
t: usize,
n: usize,
a: &'a [u32],
}
impl<'a> BTreeState<'a> {
fn new(n: usize, b: usize, a: &'a [u32]) -> Self {
let B = b;
let nblocks = (n + B - 1) / B;
BTreeState {
nblocks,
B,
t: 0,
n,
a: &a,
}
}
fn search(&self, x: u32, btree: &Array2<u32>) -> u32 {
let mut k = 0;
let mut res = PLACEHOLDER;
loop {
'start: {
while k < self.nblocks {
for i in 0..self.B {
if btree[[k, i]] >= x {
res = btree[[k, i]];
let oldk = k;
k = go(&self, k, i);
println!(r"res = {res}, k = {oldk}, i = {i}, newk = {k}");
break 'start;
}
}
}
k = go(&self, k, self.B);
}
if k >= self.nblocks { break; }
}
res
}
fn search_fast(&self, x: &str, btree: &Array2<u32>, s: &str) -> u32 {
let mut k = 0;
let mut res = PLACEHOLDER;
while k < self.nblocks {
let mut mask: usize = 1 << self.B;
for i in 0..self.B {
mask |= match s[btree[[k, i]] as usize..].cmp(x) {
Ordering::Greater | Ordering::Equal => {
1
},
Ordering::Less => {
0
},
} << i;
}
let i = mask.trailing_zeros() as usize;
if i < self.B {
res = btree[[k, i]]
}
k = self.get_offset(k, i);
}
res
}
fn get_offset(&self, k: usize, i: usize) -> usize {
k * (self.B + 1) + i + 1
}
}
const PLACEHOLDER: u32 = u32::MAX;
fn go(state: &BTreeState, k: usize, i: usize) -> usize {
k * (state.B + 1) + i + 1
}
fn build(state: &mut BTreeState, k: usize, btree: &mut Array2<u32>) {
if k < state.nblocks {
for i in 0..state.B {
build(state, state.get_offset(k, i), btree);
btree[[k, i]] = if state.t < state.n {
let r = state.a[state.t];
state.t += 1;
r
} else {
PLACEHOLDER
};
}
let idx = state.get_offset(k, state.B);
build(state, idx, btree);
}
}
fn main() {
let s = "the quick brown fox was quick.$";
let a = naive_table(&s);
const B: usize = 4;
let mut state = BTreeState::new(a.len(), B, &a);
let mut btree = Array2::zeros((state.nblocks, B));
build(&mut state, 0, &mut btree);
println!("btree =\n{:?}", btree);
let p = "quick#";
let r2 = state.search_fast(&p, &btree, &s);
let r = r2;
println!("search for {:?} yields {}, fast search yields {}", p, r, r2);
println!("{}", &s[r2 as usize..]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment