ExpHP/factor_sieve.rs

## factor_sieve.rs
#![feature(step_by)]
#![feature(test)]
pub extern crate test;

use test::Bencher;

// Using a segmented sieve method for factorization.

// This has a couple of speed advantages that are not possible to offer
// currently through the abstractions present in my `factor` crate.
//
// * A more efficient method of generating a factorization sieve (which
//   in `factor` is known as a `ListFactorizer`) that builds it in segments.
// * Linear optimizations in the construction of Factorization objects
//   from the list, which avoid requirements like `HashMap` by taking advantage
//   of the fact that the factorization sieve we generate always produces the
//   smallest factor. (hence factors are produced in order, making it easy
//   to count duplicates)

pub type Int = i32;
pub const INT_TYPE_BYTES: usize = 4; // ::std::mem::size_of is not a const fn, because lolidunno

// The goal is to work on the sieve in segments that fit in L1 cache.
// Unfortunately I do not know how best to make this configurable.
const L1D_CACHE_SIZE: usize = 32768;
pub const SEGMENT_LEN: usize = L1D_CACHE_SIZE / INT_TYPE_BYTES;

/// An object for efficiently factorizing numbers.
///
/// A `FactorSieve` is simply a precomputed array which stores the smallest
/// prime factor `p` for each integer `x`, up to some limit.  The remaining
/// factors are found recursively by using the sieve to factorize `x / p`
/// until `x == 1`.
///
/// For any non-composite `x` or zero, `sieve[x] = x`.
#[derive(Clone)]
pub struct FactorSieve {
	sieve: Vec<Int>,
}

impl FactorSieve {
	/// Compute a factor sieve.
	pub fn new(limit: Int) -> Self {
		assert!(limit >= 0);
		FactorSieve { sieve: factor_sieve_segmented(limit - 1, SEGMENT_LEN) }
	}

	/// Factorize a number, one prime factor at a time (with duplicates)
	///
	/// Produces prime factors of `x` one by one in order of increasing magnitude.
	/// If `p` appears to the power `exp` in the factorization of `x`, then the
	/// iterator will yield it `exp` times.
	pub fn prime_iter(&self, x: Int) -> SieveIter {
		assert!(x >= 0);
		assert!(x < self.sieve.len() as Int);
		SieveIter { x: x, sieve: &self.sieve }
	}

	/// Factorize a number into `(prime, exp)` pairs.
	pub fn factorize(&self, x: Int) -> Vec<(Int, usize)> {
		assert!(x >= 0);
		// special cases to make life easier
		if (x == 0) { return vec![(0, 1)]; }
		if (x == 1) { return vec![]; }

		let mut iter = self.prime_iter(x);

		// the rest of this is basically trying to implement something like the
		// following in iterator-speak (but there's no grouping iterator in std):
		//    iter.group_by(|p| p)
		//        .map(|(p, group)| (p, group.count()))

		// initialize with first item
		let mut prev = iter.next().unwrap(); // safe for x != 1
		let mut count = 1;

		// count occurrences
		let mut out = vec![];
		for p in iter {
			if p != prev {
				out.push((prev, count));
				prev = p;
				count = 0;
			}
			count += 1;
		}
		out.push((prev, count)); // final group
		out
	}

	/// Get the smallest prime factor of a number
	#[inline]
	pub fn get_prime(&self, x: Int) -> Int { self[x] }

	pub fn into_vec(self) -> Vec<Int> { self.sieve }
	pub fn as_vec(&self) -> &Vec<Int> { &self.sieve }
	pub fn as_vec_mut(&mut self) -> &mut Vec<Int> { &mut self.sieve }
}

impl ::std::ops::Index<Int> for FactorSieve {
	type Output = Int;
	#[inline]
	fn index(&self, index: Int) -> &Int { &self.sieve[index as usize] }
}

/// Get prime factors of a number one by one using a sieve.
pub struct SieveIter<'a> {
	x: Int,
	sieve: &'a Vec<Int>,
}
impl<'a> Iterator for SieveIter<'a> {
	type Item = Int;
	#[inline]
	fn next(&mut self) -> Option<Int> {
		match self.x {
			1 => None,
			x => {
				let p = self.sieve[x as usize];
				self.x /= p;
				Some(p)
			}
		}
	}
}


//------------------------------------------------------

// The implementation is adapted from a segmented Sieve of Eratosthenes
// found here: http://primesieve.org/segmented_sieve.html
//
// The idea in that code is to perform incremental work, using all prime
// factors to sieve one region at a time that fits in L1 cache.
// However, we do not gain all benefits of the original code due to
// fundamental changes in design (we're factorizing all numbers, not just
// finding primes), and probably due to other things I failed to consider
// properly when adapting the code.

// FIXME: make this more idiomatic, and less of a `mut`-fest
// FIXME: make this use exclusive limits
pub fn factor_sieve_segmented(limit: Int, segment_size: usize) -> Vec<Int>
{
	// Notice limit is inclusive in this function.

	// Small cases where no primes have multiples to sieve.
	debug_assert!(limit >= -1);
	if limit < 4 {
		return (0..limit+1).collect();
	}

	// Consider that the smallest `x > p` for which `out[x] == p` is `p*p`.
	// As a result, any primes for which `p*p > limit` would be pointless to use in
	// trial division, and do not need to be tracked beyond their first encounter.
	//
	// The primes which DO matter are only those up to sqrt(limit), which we can
	// collect in comparatively no time with a standard sieve of eratosthenes.
	let primes: Vec<_> = {
		let mut s_limit = (limit as f64).powf(0.5) as usize;
		while s_limit * s_limit < limit as usize { s_limit += 1 }; // ensure large enough
		prime_sieve_simple(s_limit).into_iter()
			.enumerate().skip(2).filter(|&(_,x)| x)
			.map(|(p,_)| p)
			.collect()
	};
	assert_eq!(primes.first(), Some(&2)); // `None` is prevented by special case at top

	// Workspace containing one segment of the output at a time.
	// The fill value of 0 is not significant and may as well be uninitialized.
	let mut workspace = filled_vec(segment_size, 0);
	let mut sieve = filled_vec((limit + 1) as usize, 0);

	// Vector which tracks locations of the first multiple of each prime in the next segment.
	let mut offsets = vec![];
	// Iterator for small primes that are not yet tracked
	let mut new_primes = primes.iter().cloned().peekable();

	// Iterate over segments
	let lows = (0..).step_by(segment_size);
	for (low, segment) in lows.zip(sieve.chunks_mut(segment_size)) {

		// Fill default values for this segment, which are equal to the sieve index.
		// These values will be kept by 0, 1, and any primes.
		for j in 0..workspace.len() {
			workspace[j] = (low + j) as Int;
		}
		let high = low + segment.len();

		// Begin tracking the primes whose first composite is in this segment
		while let Some(p) = new_primes.peek().cloned() {
			if p*p >= high { break; } // outside segment

			new_primes.next(); // consume
			offsets.push(p*p - low);
		}

		// Collect the offsets
		let mut it = offsets.iter_mut().zip(primes.iter());


		// A small wheel optimization: Handle p = 2 separately so we can skip
		//  even multiples of odd primes.
		if let Some((start2, _)) = it.next() {
			for i in (*start2..workspace.len()).step_by(2) {
				workspace[i] = 2;
			}
			// FIXME is this right? I wrote this a while ago, and looking
			// at it now it would seem that I also intended to reduce start2
			// modulo something.
			// Currently segment size is never odd so there is no impact
			*start2 += (segment_size % 2);
		}

		// Sieve the current segment with primes >= 3.
		// Do them in reverse order so that the smallest ones overwrite larger ones.
		for (start, &p) in make_double_ended(it).rev() {

			// The initial value of `start` is `x - low`, where `x` is the first
			// odd multiple of `p` inside this segment.
			let mut j = *start;
			let step = (2*p) as usize;
			while j < segment_size {
				workspace[j] = p as Int;
				j += step;
			}

			// j is now equal to the offset of the first odd multiple outside this segment.
			// Subtract segment_size to account for the change in offset between main loop iterations.
			// (note: this correctly accounts for the case where p > segment_size)
			*start = j - segment_size;
		}
		copy(&workspace[..], segment);
	}
	sieve
}

// helper to identify small primes
// limit is INCLUSIVE
fn prime_sieve_simple(limit: usize) -> Vec<bool> {
  let mut sieve = filled_vec(limit + 1, true);
  for i in 2.. {
    if i * i > limit { break }
    if (sieve[i]) {
      for j in (i*i..limit+1).step_by(i) {
        sieve[j] = false;
      }
    }
  }
  sieve
}

//----------------------------------------------------

// for benchmarking against the segmented method
// limit is EXCLUSIVE
fn factor_sieve_simple(limit: Int) -> Vec<Int> {
	let mut sieve: Vec<_> = (0..limit).collect();
	for x in (2..limit).step_by(2) { sieve[x as usize] = 2; }
	for x in (3..).step_by(2) {
		let first = x*x;
		if first > limit { break }
		if sieve[x as usize] == x {
			for m in (first..limit).step_by(2*x) {
				if sieve[m as usize] == m {
					sieve[m as usize] = x;
				}
			}
		}
	}
	sieve
}

fn make_double_ended<T,I:Iterator<Item=T>>(iter: I) -> ::std::vec::IntoIter<T> {
	iter.collect::<Vec<_>>().into_iter()
}

//---------------------------------------

fn filled_vec<T:Clone>(size: usize, value: T) -> Vec<T> {
	let mut vec = vec![];
	vec.resize(size, value);
	vec
}

fn fill<T:Clone>(slice: &mut [T], value: T) {
	for x in slice.iter_mut() { *x = value.clone() }
}

fn copy<T:Copy>(source: &[T], dest: &mut [T]) {
	assert!(source.len() >= dest.len());
	for i in 0..dest.len() {
		dest[i] = source[i];
	}
}
#[test]
fn test() {
	// a "definitely correct" sieve formed by trial division.
	// It is large enough to include values for which various certain
	// incorrectly-implemented optimizations could give the wrong prime
	// factor (e.g. a certain easy mistake results in giving 3 for 18,
	// 5 for 45, and 7 for 175; and the first incorrect value may be pushed
	// even further back if a large wheel optimization is used)
	let mut correct = vec![0,1];
	for i in 2..2000 {
		for k in 2..(i+1) {
			if i % k == 0 {
				correct.push(k);
				break;
			}
		}
	}
	for (i,x) in correct.clone().into_iter().enumerate() {
		if x == 3 {
			println!("{}", i);
		}
	}
	// a "definitely definitely correct" sieve to verify that one against,
	// up to a point
	assert_eq!(correct.len(), 2000);
	assert_eq!(&correct[..12], &vec![0,1,2,3,2,5,2,7,2,3,2,11][..]);

	// try various sizes, in particular:
	for len in vec![
		0, 1,   // Obvious edge cases (length 0, 1)
		2, 3,   // More subtle edge case (no multiples to cross off)
		9, 10, 11, // End in a prime square; a composite; and a prime
		correct.len(), // Catch erroneous values for larger x
	] {
		let actual = FactorSieve::new(len as Int).into_vec();
		assert_eq!(&actual[..], &correct[..len]);
	}
}

#[bench]
fn bench_simple(b: &mut Bencher) {
	b.iter(|| {
		let n = 10*SEGMENT_LEN + 20;
		let n = 100_000_000;
		factor_sieve_simple(n as Int).last().cloned()
	});
}
#[bench]
fn bench_segmented(b: &mut Bencher) {
	b.iter(|| {
		let n = 10*SEGMENT_LEN + 20; // extra to force an incomplete segment
		let n = 100_000_000;
		FactorSieve::new(n as Int).into_vec().last().cloned()
	});
}
	#![feature(step_by)]
	#![feature(test)]
	pub extern crate test;

	use test::Bencher;

	// Using a segmented sieve method for factorization.

	// This has a couple of speed advantages that are not possible to offer
	// currently through the abstractions present in my `factor` crate.
	//
	// * A more efficient method of generating a factorization sieve (which
	// in `factor` is known as a `ListFactorizer`) that builds it in segments.
	// * Linear optimizations in the construction of Factorization objects
	// from the list, which avoid requirements like `HashMap` by taking advantage
	// of the fact that the factorization sieve we generate always produces the
	// smallest factor. (hence factors are produced in order, making it easy
	// to count duplicates)

	pub type Int = i32;
	pub const INT_TYPE_BYTES: usize = 4; // ::std::mem::size_of is not a const fn, because lolidunno

	// The goal is to work on the sieve in segments that fit in L1 cache.
	// Unfortunately I do not know how best to make this configurable.
	const L1D_CACHE_SIZE: usize = 32768;
	pub const SEGMENT_LEN: usize = L1D_CACHE_SIZE / INT_TYPE_BYTES;

	/// An object for efficiently factorizing numbers.
	///
	/// A `FactorSieve` is simply a precomputed array which stores the smallest
	/// prime factor `p` for each integer `x`, up to some limit. The remaining
	/// factors are found recursively by using the sieve to factorize `x / p`
	/// until `x == 1`.
	///
	/// For any non-composite `x` or zero, `sieve[x] = x`.
	#[derive(Clone)]
	pub struct FactorSieve {
	sieve: Vec<Int>,
	}

	impl FactorSieve {
	/// Compute a factor sieve.
	pub fn new(limit: Int) -> Self {
	assert!(limit >= 0);
	FactorSieve { sieve: factor_sieve_segmented(limit - 1, SEGMENT_LEN) }
	}

	/// Factorize a number, one prime factor at a time (with duplicates)
	///
	/// Produces prime factors of `x` one by one in order of increasing magnitude.
	/// If `p` appears to the power `exp` in the factorization of `x`, then the
	/// iterator will yield it `exp` times.
	pub fn prime_iter(&self, x: Int) -> SieveIter {
	assert!(x >= 0);
	assert!(x < self.sieve.len() as Int);
	SieveIter { x: x, sieve: &self.sieve }
	}

	/// Factorize a number into `(prime, exp)` pairs.
	pub fn factorize(&self, x: Int) -> Vec<(Int, usize)> {
	assert!(x >= 0);
	// special cases to make life easier
	if (x == 0) { return vec![(0, 1)]; }
	if (x == 1) { return vec![]; }

	let mut iter = self.prime_iter(x);

	// the rest of this is basically trying to implement something like the
	// following in iterator-speak (but there's no grouping iterator in std):
	// iter.group_by(\|p\| p)
	// .map(\|(p, group)\| (p, group.count()))

	// initialize with first item
	let mut prev = iter.next().unwrap(); // safe for x != 1
	let mut count = 1;

	// count occurrences
	let mut out = vec![];
	for p in iter {
	if p != prev {
	out.push((prev, count));
	prev = p;
	count = 0;
	}
	count += 1;
	}
	out.push((prev, count)); // final group
	out
	}

	/// Get the smallest prime factor of a number
	#[inline]
	pub fn get_prime(&self, x: Int) -> Int { self[x] }

	pub fn into_vec(self) -> Vec<Int> { self.sieve }
	pub fn as_vec(&self) -> &Vec<Int> { &self.sieve }
	pub fn as_vec_mut(&mut self) -> &mut Vec<Int> { &mut self.sieve }
	}

	impl ::std::ops::Index<Int> for FactorSieve {
	type Output = Int;
	#[inline]
	fn index(&self, index: Int) -> &Int { &self.sieve[index as usize] }
	}

	/// Get prime factors of a number one by one using a sieve.
	pub struct SieveIter<'a> {
	x: Int,
	sieve: &'a Vec<Int>,
	}
	impl<'a> Iterator for SieveIter<'a> {
	type Item = Int;
	#[inline]
	fn next(&mut self) -> Option<Int> {
	match self.x {
	1 => None,
	x => {
	let p = self.sieve[x as usize];
	self.x /= p;
	Some(p)
	}
	}
	}
	}


	//------------------------------------------------------

	// The implementation is adapted from a segmented Sieve of Eratosthenes
	// found here: http://primesieve.org/segmented_sieve.html
	//
	// The idea in that code is to perform incremental work, using all prime
	// factors to sieve one region at a time that fits in L1 cache.
	// However, we do not gain all benefits of the original code due to
	// fundamental changes in design (we're factorizing all numbers, not just
	// finding primes), and probably due to other things I failed to consider
	// properly when adapting the code.

	// FIXME: make this more idiomatic, and less of a `mut`-fest
	// FIXME: make this use exclusive limits
	pub fn factor_sieve_segmented(limit: Int, segment_size: usize) -> Vec<Int>
	{
	// Notice limit is inclusive in this function.

	// Small cases where no primes have multiples to sieve.
	debug_assert!(limit >= -1);
	if limit < 4 {
	return (0..limit+1).collect();
	}

	// Consider that the smallest `x > p` for which `out[x] == p` is `p*p`.
	// As a result, any primes for which `p*p > limit` would be pointless to use in
	// trial division, and do not need to be tracked beyond their first encounter.
	//
	// The primes which DO matter are only those up to sqrt(limit), which we can
	// collect in comparatively no time with a standard sieve of eratosthenes.
	let primes: Vec<_> = {
	let mut s_limit = (limit as f64).powf(0.5) as usize;
	while s_limit * s_limit < limit as usize { s_limit += 1 }; // ensure large enough
	prime_sieve_simple(s_limit).into_iter()
	.enumerate().skip(2).filter(\|&(_,x)\| x)
	.map(\|(p,_)\| p)
	.collect()
	};
	assert_eq!(primes.first(), Some(&2)); // `None` is prevented by special case at top

	// Workspace containing one segment of the output at a time.
	// The fill value of 0 is not significant and may as well be uninitialized.
	let mut workspace = filled_vec(segment_size, 0);
	let mut sieve = filled_vec((limit + 1) as usize, 0);

	// Vector which tracks locations of the first multiple of each prime in the next segment.
	let mut offsets = vec![];
	// Iterator for small primes that are not yet tracked
	let mut new_primes = primes.iter().cloned().peekable();

	// Iterate over segments
	let lows = (0..).step_by(segment_size);
	for (low, segment) in lows.zip(sieve.chunks_mut(segment_size)) {

	// Fill default values for this segment, which are equal to the sieve index.
	// These values will be kept by 0, 1, and any primes.
	for j in 0..workspace.len() {
	workspace[j] = (low + j) as Int;
	}
	let high = low + segment.len();

	// Begin tracking the primes whose first composite is in this segment
	while let Some(p) = new_primes.peek().cloned() {
	if p*p >= high { break; } // outside segment

	new_primes.next(); // consume
	offsets.push(p*p - low);
	}

	// Collect the offsets
	let mut it = offsets.iter_mut().zip(primes.iter());


	// A small wheel optimization: Handle p = 2 separately so we can skip
	// even multiples of odd primes.
	if let Some((start2, _)) = it.next() {
	for i in (*start2..workspace.len()).step_by(2) {
	workspace[i] = 2;
	}
	// FIXME is this right? I wrote this a while ago, and looking
	// at it now it would seem that I also intended to reduce start2
	// modulo something.
	// Currently segment size is never odd so there is no impact
	*start2 += (segment_size % 2);
	}

	// Sieve the current segment with primes >= 3.
	// Do them in reverse order so that the smallest ones overwrite larger ones.
	for (start, &p) in make_double_ended(it).rev() {

	// The initial value of `start` is `x - low`, where `x` is the first
	// odd multiple of `p` inside this segment.
	let mut j = *start;
	let step = (2*p) as usize;
	while j < segment_size {
	workspace[j] = p as Int;
	j += step;
	}

	// j is now equal to the offset of the first odd multiple outside this segment.
	// Subtract segment_size to account for the change in offset between main loop iterations.
	// (note: this correctly accounts for the case where p > segment_size)
	*start = j - segment_size;
	}
	copy(&workspace[..], segment);
	}
	sieve
	}

	// helper to identify small primes
	// limit is INCLUSIVE
	fn prime_sieve_simple(limit: usize) -> Vec<bool> {
	let mut sieve = filled_vec(limit + 1, true);
	for i in 2.. {
	if i * i > limit { break }
	if (sieve[i]) {
	for j in (i*i..limit+1).step_by(i) {
	sieve[j] = false;
	}
	}
	}
	sieve
	}

	//----------------------------------------------------

	// for benchmarking against the segmented method
	// limit is EXCLUSIVE
	fn factor_sieve_simple(limit: Int) -> Vec<Int> {
	let mut sieve: Vec<_> = (0..limit).collect();
	for x in (2..limit).step_by(2) { sieve[x as usize] = 2; }
	for x in (3..).step_by(2) {
	let first = x*x;
	if first > limit { break }
	if sieve[x as usize] == x {
	for m in (first..limit).step_by(2*x) {
	if sieve[m as usize] == m {
	sieve[m as usize] = x;
	}
	}
	}
	}
	sieve
	}

	fn make_double_ended<T,I:Iterator<Item=T>>(iter: I) -> ::std::vec::IntoIter<T> {
	iter.collect::<Vec<_>>().into_iter()
	}

	//---------------------------------------

	fn filled_vec<T:Clone>(size: usize, value: T) -> Vec<T> {
	let mut vec = vec![];
	vec.resize(size, value);
	vec
	}

	fn fill<T:Clone>(slice: &mut [T], value: T) {
	for x in slice.iter_mut() { *x = value.clone() }
	}

	fn copy<T:Copy>(source: &[T], dest: &mut [T]) {
	assert!(source.len() >= dest.len());
	for i in 0..dest.len() {
	dest[i] = source[i];
	}
	}
	#[test]
	fn test() {
	// a "definitely correct" sieve formed by trial division.
	// It is large enough to include values for which various certain
	// incorrectly-implemented optimizations could give the wrong prime
	// factor (e.g. a certain easy mistake results in giving 3 for 18,
	// 5 for 45, and 7 for 175; and the first incorrect value may be pushed
	// even further back if a large wheel optimization is used)
	let mut correct = vec![0,1];
	for i in 2..2000 {
	for k in 2..(i+1) {
	if i % k == 0 {
	correct.push(k);
	break;
	}
	}
	}
	for (i,x) in correct.clone().into_iter().enumerate() {
	if x == 3 {
	println!("{}", i);
	}
	}
	// a "definitely definitely correct" sieve to verify that one against,
	// up to a point
	assert_eq!(correct.len(), 2000);
	assert_eq!(&correct[..12], &vec![0,1,2,3,2,5,2,7,2,3,2,11][..]);

	// try various sizes, in particular:
	for len in vec![
	0, 1, // Obvious edge cases (length 0, 1)
	2, 3, // More subtle edge case (no multiples to cross off)
	9, 10, 11, // End in a prime square; a composite; and a prime
	correct.len(), // Catch erroneous values for larger x
	] {
	let actual = FactorSieve::new(len as Int).into_vec();
	assert_eq!(&actual[..], &correct[..len]);
	}
	}

	#[bench]
	fn bench_simple(b: &mut Bencher) {
	b.iter(\|\| {
	let n = 10*SEGMENT_LEN + 20;
	let n = 100_000_000;
	factor_sieve_simple(n as Int).last().cloned()
	});
	}
	#[bench]
	fn bench_segmented(b: &mut Bencher) {
	b.iter(\|\| {
	let n = 10*SEGMENT_LEN + 20; // extra to force an incomplete segment
	let n = 100_000_000;
	FactorSieve::new(n as Int).into_vec().last().cloned()
	});
	}