|
extern crate byteorder; |
|
extern crate fst; |
|
|
|
use byteorder::{ByteOrder, BigEndian}; |
|
use fst::{IntoStreamer, Streamer}; |
|
|
|
fn main() { |
|
let primes = sieve(100_000_000); |
|
println!("# primes: {}, size: {} bytes", primes.len(), primes.len() * 4); |
|
|
|
let fst = FstIntSet::from_vec(primes); |
|
println!("FST size: {} bytes", fst.size()); |
|
|
|
// Very quickly query for whether an integer is prime. |
|
println!("prime? {} => {}", 1, fst.contains(1)); |
|
println!("prime? {} => {}", 2, fst.contains(2)); |
|
println!("prime? {} => {}", 999613, fst.contains(999613)); |
|
println!("prime? {} => {}", 999614, fst.contains(999614)); |
|
println!("prime? {} => {}", 9999973, fst.contains(9999973)); |
|
|
|
// Range queries are totally cool because big-endian preserves |
|
// integer ordering. If we encoded integers as little-endian, this would |
|
// not work! |
|
let (start, end) = (999_800, 1_000_000); |
|
println!("primes in range {}-{}", start, end); |
|
let mut stream = fst.range(start, end); |
|
while let Some(key) = stream.next() { |
|
let n = BigEndian::read_u32(&key); |
|
println!("{}", n); |
|
} |
|
} |
|
|
|
struct FstIntSet { |
|
set: fst::Set, |
|
} |
|
|
|
impl FstIntSet { |
|
fn from_vec(mut nums: Vec<u32>) -> FstIntSet { |
|
nums.sort(); |
|
nums.dedup(); |
|
|
|
let mut build = fst::SetBuilder::memory(); |
|
for n in nums { |
|
let mut buf = [0; 4]; |
|
BigEndian::write_u32(&mut buf, n); |
|
build.insert(buf).unwrap(); |
|
} |
|
let bytes = build.into_inner().unwrap(); |
|
FstIntSet { |
|
set: fst::Set::from_bytes(bytes).unwrap(), |
|
} |
|
} |
|
|
|
fn contains(&self, n: u32) -> bool { |
|
let mut buf = [0; 4]; |
|
BigEndian::write_u32(&mut buf, n); |
|
self.set.contains(buf) |
|
} |
|
|
|
fn range(&self, start: u32, end: u32) -> fst::set::Stream { |
|
let (mut bstart, mut bend) = ([0; 4], [0; 4]); |
|
BigEndian::write_u32(&mut bstart, start); |
|
BigEndian::write_u32(&mut bend, end); |
|
self.set.range().ge(bstart).lt(bend).into_stream() |
|
} |
|
|
|
fn size(&self) -> usize { |
|
self.set.as_fst().size() |
|
} |
|
} |
|
|
|
fn sieve(n: usize) -> Vec<u32> { |
|
if n <= 1 { |
|
return vec![]; |
|
} |
|
|
|
// Taken from: |
|
// https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes#Algorithm_and_variants |
|
let mut marked = vec![true; n+1]; |
|
marked[0] = false; |
|
marked[1] = false; |
|
for i in 2..((n as f64).sqrt().ceil() as usize) { |
|
if marked[i] { |
|
let mut j = i * i; |
|
let mut k = 0; |
|
while j <= n { |
|
marked[j] = false; |
|
k += 1; |
|
j = (i * i) + k * i; |
|
} |
|
} |
|
} |
|
marked.iter() |
|
.enumerate() |
|
.filter_map(|(i, &m)| if !m { None } else { Some(i as u32) }) |
|
.collect() |
|
} |