Skip to content

Instantly share code, notes, and snippets.

@ExpHP
Created August 18, 2016 15:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ExpHP/c081842d1daf947bba51ce465716a734 to your computer and use it in GitHub Desktop.
Save ExpHP/c081842d1daf947bba51ce465716a734 to your computer and use it in GitHub Desktop.
segmented sieve
#![feature(step_by)]
#![feature(test)]
pub extern crate test;
use test::Bencher;
// Using a segmented sieve method for factorization.
// This has a couple of speed advantages that are not possible to offer
// currently through the abstractions present in my `factor` crate.
//
// * A more efficient method of generating a factorization sieve (which
// in `factor` is known as a `ListFactorizer`) that builds it in segments.
// * Linear optimizations in the construction of Factorization objects
// from the list, which avoid requirements like `HashMap` by taking advantage
// of the fact that the factorization sieve we generate always produces the
// smallest factor. (hence factors are produced in order, making it easy
// to count duplicates)
pub type Int = i32;
pub const INT_TYPE_BYTES: usize = 4; // ::std::mem::size_of is not a const fn, because lolidunno
// The goal is to work on the sieve in segments that fit in L1 cache.
// Unfortunately I do not know how best to make this configurable.
const L1D_CACHE_SIZE: usize = 32768;
pub const SEGMENT_LEN: usize = L1D_CACHE_SIZE / INT_TYPE_BYTES;
/// An object for efficiently factorizing numbers.
///
/// A `FactorSieve` is simply a precomputed array which stores the smallest
/// prime factor `p` for each integer `x`, up to some limit. The remaining
/// factors are found recursively by using the sieve to factorize `x / p`
/// until `x == 1`.
///
/// For any non-composite `x` or zero, `sieve[x] = x`.
#[derive(Clone)]
pub struct FactorSieve {
sieve: Vec<Int>,
}
impl FactorSieve {
/// Compute a factor sieve.
pub fn new(limit: Int) -> Self {
assert!(limit >= 0);
FactorSieve { sieve: factor_sieve_segmented(limit - 1, SEGMENT_LEN) }
}
/// Factorize a number, one prime factor at a time (with duplicates)
///
/// Produces prime factors of `x` one by one in order of increasing magnitude.
/// If `p` appears to the power `exp` in the factorization of `x`, then the
/// iterator will yield it `exp` times.
pub fn prime_iter(&self, x: Int) -> SieveIter {
assert!(x >= 0);
assert!(x < self.sieve.len() as Int);
SieveIter { x: x, sieve: &self.sieve }
}
/// Factorize a number into `(prime, exp)` pairs.
pub fn factorize(&self, x: Int) -> Vec<(Int, usize)> {
assert!(x >= 0);
// special cases to make life easier
if (x == 0) { return vec![(0, 1)]; }
if (x == 1) { return vec![]; }
let mut iter = self.prime_iter(x);
// the rest of this is basically trying to implement something like the
// following in iterator-speak (but there's no grouping iterator in std):
// iter.group_by(|p| p)
// .map(|(p, group)| (p, group.count()))
// initialize with first item
let mut prev = iter.next().unwrap(); // safe for x != 1
let mut count = 1;
// count occurrences
let mut out = vec![];
for p in iter {
if p != prev {
out.push((prev, count));
prev = p;
count = 0;
}
count += 1;
}
out.push((prev, count)); // final group
out
}
/// Get the smallest prime factor of a number
#[inline]
pub fn get_prime(&self, x: Int) -> Int { self[x] }
pub fn into_vec(self) -> Vec<Int> { self.sieve }
pub fn as_vec(&self) -> &Vec<Int> { &self.sieve }
pub fn as_vec_mut(&mut self) -> &mut Vec<Int> { &mut self.sieve }
}
impl ::std::ops::Index<Int> for FactorSieve {
type Output = Int;
#[inline]
fn index(&self, index: Int) -> &Int { &self.sieve[index as usize] }
}
/// Get prime factors of a number one by one using a sieve.
pub struct SieveIter<'a> {
x: Int,
sieve: &'a Vec<Int>,
}
impl<'a> Iterator for SieveIter<'a> {
type Item = Int;
#[inline]
fn next(&mut self) -> Option<Int> {
match self.x {
1 => None,
x => {
let p = self.sieve[x as usize];
self.x /= p;
Some(p)
}
}
}
}
//------------------------------------------------------
// The implementation is adapted from a segmented Sieve of Eratosthenes
// found here: http://primesieve.org/segmented_sieve.html
//
// The idea in that code is to perform incremental work, using all prime
// factors to sieve one region at a time that fits in L1 cache.
// However, we do not gain all benefits of the original code due to
// fundamental changes in design (we're factorizing all numbers, not just
// finding primes), and probably due to other things I failed to consider
// properly when adapting the code.
// FIXME: make this more idiomatic, and less of a `mut`-fest
// FIXME: make this use exclusive limits
pub fn factor_sieve_segmented(limit: Int, segment_size: usize) -> Vec<Int>
{
// Notice limit is inclusive in this function.
// Small cases where no primes have multiples to sieve.
debug_assert!(limit >= -1);
if limit < 4 {
return (0..limit+1).collect();
}
// Consider that the smallest `x > p` for which `out[x] == p` is `p*p`.
// As a result, any primes for which `p*p > limit` would be pointless to use in
// trial division, and do not need to be tracked beyond their first encounter.
//
// The primes which DO matter are only those up to sqrt(limit), which we can
// collect in comparatively no time with a standard sieve of eratosthenes.
let primes: Vec<_> = {
let mut s_limit = (limit as f64).powf(0.5) as usize;
while s_limit * s_limit < limit as usize { s_limit += 1 }; // ensure large enough
prime_sieve_simple(s_limit).into_iter()
.enumerate().skip(2).filter(|&(_,x)| x)
.map(|(p,_)| p)
.collect()
};
assert_eq!(primes.first(), Some(&2)); // `None` is prevented by special case at top
// Workspace containing one segment of the output at a time.
// The fill value of 0 is not significant and may as well be uninitialized.
let mut workspace = filled_vec(segment_size, 0);
let mut sieve = filled_vec((limit + 1) as usize, 0);
// Vector which tracks locations of the first multiple of each prime in the next segment.
let mut offsets = vec![];
// Iterator for small primes that are not yet tracked
let mut new_primes = primes.iter().cloned().peekable();
// Iterate over segments
let lows = (0..).step_by(segment_size);
for (low, segment) in lows.zip(sieve.chunks_mut(segment_size)) {
// Fill default values for this segment, which are equal to the sieve index.
// These values will be kept by 0, 1, and any primes.
for j in 0..workspace.len() {
workspace[j] = (low + j) as Int;
}
let high = low + segment.len();
// Begin tracking the primes whose first composite is in this segment
while let Some(p) = new_primes.peek().cloned() {
if p*p >= high { break; } // outside segment
new_primes.next(); // consume
offsets.push(p*p - low);
}
// Collect the offsets
let mut it = offsets.iter_mut().zip(primes.iter());
// A small wheel optimization: Handle p = 2 separately so we can skip
// even multiples of odd primes.
if let Some((start2, _)) = it.next() {
for i in (*start2..workspace.len()).step_by(2) {
workspace[i] = 2;
}
// FIXME is this right? I wrote this a while ago, and looking
// at it now it would seem that I also intended to reduce start2
// modulo something.
// Currently segment size is never odd so there is no impact
*start2 += (segment_size % 2);
}
// Sieve the current segment with primes >= 3.
// Do them in reverse order so that the smallest ones overwrite larger ones.
for (start, &p) in make_double_ended(it).rev() {
// The initial value of `start` is `x - low`, where `x` is the first
// odd multiple of `p` inside this segment.
let mut j = *start;
let step = (2*p) as usize;
while j < segment_size {
workspace[j] = p as Int;
j += step;
}
// j is now equal to the offset of the first odd multiple outside this segment.
// Subtract segment_size to account for the change in offset between main loop iterations.
// (note: this correctly accounts for the case where p > segment_size)
*start = j - segment_size;
}
copy(&workspace[..], segment);
}
sieve
}
// helper to identify small primes
// limit is INCLUSIVE
fn prime_sieve_simple(limit: usize) -> Vec<bool> {
let mut sieve = filled_vec(limit + 1, true);
for i in 2.. {
if i * i > limit { break }
if (sieve[i]) {
for j in (i*i..limit+1).step_by(i) {
sieve[j] = false;
}
}
}
sieve
}
//----------------------------------------------------
// for benchmarking against the segmented method
// limit is EXCLUSIVE
fn factor_sieve_simple(limit: Int) -> Vec<Int> {
let mut sieve: Vec<_> = (0..limit).collect();
for x in (2..limit).step_by(2) { sieve[x as usize] = 2; }
for x in (3..).step_by(2) {
let first = x*x;
if first > limit { break }
if sieve[x as usize] == x {
for m in (first..limit).step_by(2*x) {
if sieve[m as usize] == m {
sieve[m as usize] = x;
}
}
}
}
sieve
}
fn make_double_ended<T,I:Iterator<Item=T>>(iter: I) -> ::std::vec::IntoIter<T> {
iter.collect::<Vec<_>>().into_iter()
}
//---------------------------------------
fn filled_vec<T:Clone>(size: usize, value: T) -> Vec<T> {
let mut vec = vec![];
vec.resize(size, value);
vec
}
fn fill<T:Clone>(slice: &mut [T], value: T) {
for x in slice.iter_mut() { *x = value.clone() }
}
fn copy<T:Copy>(source: &[T], dest: &mut [T]) {
assert!(source.len() >= dest.len());
for i in 0..dest.len() {
dest[i] = source[i];
}
}
#[test]
fn test() {
// a "definitely correct" sieve formed by trial division.
// It is large enough to include values for which various certain
// incorrectly-implemented optimizations could give the wrong prime
// factor (e.g. a certain easy mistake results in giving 3 for 18,
// 5 for 45, and 7 for 175; and the first incorrect value may be pushed
// even further back if a large wheel optimization is used)
let mut correct = vec![0,1];
for i in 2..2000 {
for k in 2..(i+1) {
if i % k == 0 {
correct.push(k);
break;
}
}
}
for (i,x) in correct.clone().into_iter().enumerate() {
if x == 3 {
println!("{}", i);
}
}
// a "definitely definitely correct" sieve to verify that one against,
// up to a point
assert_eq!(correct.len(), 2000);
assert_eq!(&correct[..12], &vec![0,1,2,3,2,5,2,7,2,3,2,11][..]);
// try various sizes, in particular:
for len in vec![
0, 1, // Obvious edge cases (length 0, 1)
2, 3, // More subtle edge case (no multiples to cross off)
9, 10, 11, // End in a prime square; a composite; and a prime
correct.len(), // Catch erroneous values for larger x
] {
let actual = FactorSieve::new(len as Int).into_vec();
assert_eq!(&actual[..], &correct[..len]);
}
}
#[bench]
fn bench_simple(b: &mut Bencher) {
b.iter(|| {
let n = 10*SEGMENT_LEN + 20;
let n = 100_000_000;
factor_sieve_simple(n as Int).last().cloned()
});
}
#[bench]
fn bench_segmented(b: &mut Bencher) {
b.iter(|| {
let n = 10*SEGMENT_LEN + 20; // extra to force an incomplete segment
let n = 100_000_000;
FactorSieve::new(n as Int).into_vec().last().cloned()
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment