Skip to content

Instantly share code, notes, and snippets.

@jzakiya
Last active January 8, 2024 01:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jzakiya/8879c0f4dfda543eaf92a3186de554d7 to your computer and use it in GitHub Desktop.
Save jzakiya/8879c0f4dfda543eaf92a3186de554d7 to your computer and use it in GitHub Desktop.
Cousin Primes generator, multi-threaded, using SSoZ (Segmented Sieve of Zakiya), written in Rust
// This Rust source file is a multiple threaded implementation to perform an
// extremely fast Segmented Sieve of Zakiya (SSoZ) to find Cousin Primes <= N.
// Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1.
// Output is the number of cousin primes <= N, or in range N1 to N2; the last
// cousin prime value for the range; and the total time of execution.
// Code originally developed on a System76 laptop with an Intel I7 6700HQ cpu,
// 2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning
// probably needed to optimize for other hardware systems (ARM, PowerPC, etc).
// Can compile as: $ cargo build --release
// or: $ RUSTFLAGS="-C opt-level=3 -C debuginfo=0 -C target-cpu=native" cargo build --release
// The later compilation creates faster runtime on my system.
// To reduce binary size in target/release/ do: $ strip cousinprimes_ssoz
// Single val: $ echo val1 | ./cousinprimes_ssoz
// Range vals: $ echo val1 val2 | ./cousinprimes_ssoz
// val1 and val2 can now be entered as either: 123456789 or 123_456_789
// Mathematical and technical basis for implementation are explained here:
// https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_
// Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_
// for_the_Riemann_Hypotheses
// https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_
// https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK
// https://www.academia.edu/81206391/Twin_Primes_Segmented_Sieve_of_Zakiya_SSoZ_Explained
// This source code, and updates, can be found here:
// https://gist.github.com/jzakiya/8879c0f4dfda543eaf92a3186de554d7
// Significant contributions provided from https://users.rust-lang.org/
// This code is provided free and subject to copyright and terms of the
// GNU General Public License Version 3, GPLv3, or greater.
// License copy/terms are here: http://www.gnu.org/licenses/
// Copyright (c) 2017-2024; Jabari Zakiya -- jzakiya at gmail dot com
// Last update: 2024/01/07
extern crate integer_sqrt;
extern crate num_cpus;
extern crate rayon;
use integer_sqrt::IntegerSquareRoot;
use rayon::prelude::*;
use std::sync::atomic::{self, AtomicUsize, AtomicU8, Ordering};
use std::time::SystemTime;
// A counter implemented using relaxed (unsynchronized) atomic operations.
struct RelaxedCounter(AtomicUsize);
impl RelaxedCounter {
fn new() -> Self { RelaxedCounter(AtomicUsize::new(0)) }
/// Increment and get the new value.
fn increment(&self) -> usize {
self.0.fetch_add(1, atomic::Ordering::Relaxed) + 1 }
}
fn print_time(title: &str, time: SystemTime) {
print!("{} = ", title);
println!("{} secs", {
match time.elapsed() {
Ok(e) => e.as_secs() as f64 + e.subsec_nanos() as f64 / 1_000_000_000f64,
Err(e) => { panic!("Timer error {:?}", e) },
}
});
}
// Customized gcd to determine coprimality; n > m; m odd
fn coprime(mut m: usize, mut n: usize) -> bool {
while m|1 != 1 { let t = m; m = n % m; n = t }
m > 0
}
// Compute modular inverse a^-1 to base m, e.g. a*(a^-1) mod m = 1
fn modinv(a0: usize, m0: usize) -> usize {
if m0 == 1 { return 1 }
let (mut a, mut m) = (a0 as isize, m0 as isize);
let (mut x0, mut inv) = (0, 1);
while a > 1 {
inv -= (a / m) * x0;
a = a % m;
std::mem::swap(&mut a, &mut m);
std::mem::swap(&mut x0, &mut inv);
}
if inv < 0 { inv += m0 as isize }
inv as usize
}
fn gen_pg_parameters(prime: usize) -> (usize, usize, usize, Vec<usize>, Vec<usize>) {
// Create prime generator parameters for given Pn
println!("using Prime Generator parameters for P{}", prime);
let primes: Vec<usize> = vec![2, 3, 5, 7, 11, 13, 17, 19, 23];
let (mut modpg, mut res_0) = (1, 0); // compute Pn's modulus and res_0 value
for prm in primes { res_0 = prm; if prm > prime { break }; modpg *= prm }
let mut rescousins: Vec<usize> = vec![]; // save upper cousinpair residues here
let mut inverses = vec![0usize; modpg+2]; // save Pn's residues inverses here
let (mut rc, mut inc, mut res) = (5,2,0); // use P3's PGS to generate residue candidates
let midmodpg = modpg >> 1; // mid value for modulus
while rc < midmodpg { // find PG's 1st half residues
if coprime(rc, modpg) { // if rc is a residue
let mc = modpg - rc; // create its modular complement
inverses[rc] = modinv(rc, modpg); // save rc and mc inverses
inverses[mc] = modinv(mc, modpg); // if in cousinprime save both hi residues
if res + 4 == rc { rescousins.push(rc); rescousins.push(mc + 4) }
res = rc; // save current found residue
}
rc += inc; inc ^= 0b110; // create next P3 sequence rc: 5 7 11 13 17 19 ...
}
rescousins.push(midmodpg + 2); rescousins.sort(); // create pivot hi_cp
inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1; // last 2 residues are self inverses
(modpg, res_0, rescousins.len(), rescousins, inverses)
}
fn set_sieve_parameters(start_num: usize, end_num: usize) ->
(usize, usize, usize, usize, usize, usize, usize, Vec<usize>, Vec<usize>) {
// Select at runtime best PG and segment size parameters for input values.
// These are good estimates derived from PG data profiling. Can be improved.
let nrange = end_num - start_num;
let bn: usize; let pg: usize;
if end_num < 49 {
bn = 1; pg = 3;
} else if nrange < 77_000_000 {
bn = 16; pg = 5;
} else if nrange < 1_100_000_000 {
bn = 32; pg = 7;
} else if nrange < 35_500_000_000 {
bn = 64; pg = 11;
} else if nrange < 14_000_000_000_000 {
pg = 13;
if nrange > 7_000_000_000_000 { bn = 384; }
else if nrange > 2_500_000_000_000 { bn = 320; }
else if nrange > 250_000_000_000 { bn = 196; }
else { bn = 128; }
} else {
bn = 384; pg = 17;
}
let (modpg, res_0, pairscnt, rescousins, resinvrs) = gen_pg_parameters(pg);
let kmin = (start_num-2) / modpg + 1; // number of resgroups to start_num
let kmax = (end_num - 2) / modpg + 1; // number of resgroups to end_num
let krange = kmax - kmin + 1; // number of resgroups in range, at least 1
let n = if krange < 37_500_000_000_000 { 4 } else if krange < 975_000_000_000_000 { 6 } else { 8 };
let b = bn * 1024 * n; // set seg size to optimize for selected PG
let ks = if krange < b { krange } else { b }; // segments resgroups size
println!("segment size = {} resgroups; seg array is [1 x {}] 64-bits", ks, ((ks-1) >> 6) + 1);
let maxpairs = krange * pairscnt; // maximum number of cousinprime pcs
println!("cousinprime candidates = {}; resgroups = {}", maxpairs, krange);
(modpg, res_0, ks, kmin, kmax, krange, pairscnt, rescousins, resinvrs)
}
fn atomic_slice(slice: &mut [u8]) -> &[AtomicU8] {
unsafe { &*(slice as *mut [u8] as *const [AtomicU8]) }
}
fn sozp5(val: usize, res_0: usize, start_num : usize, end_num : usize) -> Vec<usize> {
// Return the primes r0..sqrt(end_num) within range (start_num...end_num)
let (md, rescnt) = (30, 8); // P5's modulus and residues count
static RES: [usize; 8] = [7,11,13,17,19,23,29,31];
static BITN: [u8; 30] = [0,0,0,0,0,1,0,0,0,2,0,4,0,0,0,8,0,16,0,0,0,32,0,0,0,0,0,64,0,128];
let range_size = end_num - start_num; // integers size of inputs range
let kmax = (val - 2) / md + 1; // number of resgroups upto input value
let mut prms = vec![0u8; kmax]; // byte array of prime candidates, init '0'
let sqrtn = val.integer_sqrt(); // compute integer sqrt of val
let k = sqrtn / md; // compute its resgroup value
let (resk, mut r) = (sqrtn - md*k, 0); // compute its residue value; set residue start posn
while resk >= RES[r] { r += 1 } // find largest residue <= sqrtn posn in its resgroup
let pcs_to_sqrtn = k*rescnt + r; // number of pcs <= sqrtn
for i in 0..pcs_to_sqrtn { // for r0..sqrtn primes mark their multiples)
let (k, r) = (i/rescnt, i%rescnt); // update resgroup parameters
if (prms[k] & (1 << r)) != 0 { continue } // skip pc if not prime
let prm_r = RES[r]; // if prime save its residue value
let prime = md*k + prm_r; // numerate its value
let rem = start_num % prime; // prime's modular distance to start_num
if !(prime - rem <= range_size || rem == 0) { continue } // skip prime if no multiple in range
let prms_atomic = atomic_slice(&mut prms); // to parallel sieve along bit rows
RES.par_iter().for_each (|ri| { // mark prime's multiples in prms in parallel
let prod = prm_r * ri - 2; // compute cross-product for prm_r|ri pair
let bit_r = BITN[prod % md]; // bit mask value for prod's residue
let mut kpm = k * (prime + ri) + prod / md; // resgroup for prime's 1st multiple
while kpm < kmax { prms_atomic[kpm].fetch_or(bit_r, Ordering::Relaxed); kpm += prime; };
});
}
// prms now contains the prime multiples positions marked for the pcs r0..N
// in parallel along each restrack, identify|numerate the primes in each resgroup
// return only the primes with a multiple within range (start_num...end_num)
let primes = RES.par_iter().enumerate().flat_map_iter( |(i, ri)| {
prms.iter().enumerate().filter_map(move |(k, resgroup)| {
if resgroup & (1 << i) == 0 {
let prime = md * k + ri;
let rem = start_num % prime;
if (prime >= res_0 && prime <= val) && (prime - rem <= range_size || rem == 0) {
return Some(prime);
} } None
}) }).collect();
primes
}
fn nextp_init(rhi: usize, kmin: usize, modpg: usize, primes: &[usize], resinvrs: &[usize]) -> Vec<usize> {
// Initialize 'nextp' array for cousinpair upper residue rhi in 'rescousins'.
// Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and
// store consecutively as lo_tp|hi_tp pairs for their restracks.
let mut nextp = vec![0usize; primes.len() * 2]; // 1st mults array for cousinpair
let (r_hi, r_lo) = (rhi, rhi - 4); // upper|lower cousinpair residue values
for (j, prime) in primes.iter().enumerate() { // for each prime r0..sqrt(N)
let k = (prime - 2) / modpg; // find the resgroup it's in
let r = (prime - 2) % modpg + 2; // and its residue value
let r_inv = resinvrs[r]; // and residue inverse
let rl = (r_lo * r_inv - 2) % modpg + 2; // compute r's rl for r_lo
let rh = (r_hi * r_inv - 2) % modpg + 2; // compute r's rh for r_hi
let mut kl = k * (prime + rl) + (r * rl - 2) / modpg; // kl 1st mult resgroup
let mut kh = k * (prime + rh) + (r * rh - 2) / modpg; // kh 1st mult restroup
if kl < kmin { kl = (kmin - kl) % prime; if kl > 0 { kl = prime - kl } }
else { kl = kl - kmin };
if kh < kmin { kh = (kmin - kh) % prime; if kh > 0 { kh = prime - kh } }
else { kh = kh - kmin };
nextp[j * 2] = kl; // prime's 1st mult lo_cp resgroups in range
nextp[j * 2 | 1] = kh; // prime's 1st mult hi_cp resgroups in range
}
nextp
}
fn cousins_sieve(r_hi: usize, kmin: usize, kmax: usize, ks: usize, start_num: usize,
end_num: usize, modpg: usize, primes: &[usize], resinvrs: &[usize]) -> (usize, usize) {
// Perform in thread the ssoz for given cousinpair residues for kmax resgroups.
// First create|init 'nextp' array of 1st prime mults for given cousinpair,
// stored consequtively in 'nextp', and init seg array for ks resgroups.
// For sieve, mark resgroup bits to '1' if either cousinpair restrack is nonprime
// for primes mults resgroups, and update 'nextp' restrack slices acccordingly.
// Return the last cousinprime|sum for the range for this cousinpair residues.
// For speed, disable runtime seg array bounds checking; using 64-bit elem seg array
unsafe { // allow fast array indexing
type MWord = u64; // mem size for 64-bit cpus
const S: usize = 6; // shift value for 64 bits
const BMASK: usize = (1 << S) - 1; // bitmask val for 64 bits
let (mut sum, mut ki, mut kn) = (0usize, kmin-1, ks); // init these parameters
let (mut hi_cp, mut k_max) = (0usize, kmax); // max cousinprime|resgroup val
let mut seg = vec![0 as MWord; ((ks - 1) >> S) + 1]; // seg array for ks resgroups
if r_hi - 4 < (start_num - 2) % modpg +2 { ki += 1; } // ensure lo cp in range
if r_hi > (end_num - 2) % modpg + 2 { k_max -= 1; } // ensure hi cp in range
let mut nextp = nextp_init(r_hi, ki, modpg, primes, resinvrs); // init nextp array
while ki < k_max { // for ks size slices upto kmax
if ks > (k_max - ki) { kn = k_max - ki } // adjust kn size for last slice
for (j, prime) in primes.iter().enumerate() { // for each prime r0..sqrt(N)
// for lower cousinpair residue track
let mut k = *nextp.get_unchecked(j * 2); // starting from this resgroup in seg
while k < kn { // mark primenth resgrouup bits prime mults
*seg.get_unchecked_mut(k >> S) |= 1 << (k & BMASK);
k += prime; } // set resgroup for prime's next multiple
*nextp.get_unchecked_mut(j * 2) = k - kn; // save 1st resgroup in next eligible seg
// for upper cousinpair residue track
k = *nextp.get_unchecked(j * 2 | 1); // starting from this resgroup in seg
while k < kn { // mark primenth resgroup bits prime mults
*seg.get_unchecked_mut(k >> S) |= 1 << (k & BMASK);
k += prime; } // set resgroup for prime's next multiple
*nextp.get_unchecked_mut(j * 2 | 1) = k - kn; // save 1st resgroup in next eligible seg
} // set as nonprime unused bits in last seg[n]
// so fast, do for every seg[i]
*seg.get_unchecked_mut((kn - 1) >> S) |= !1u64 << ((kn - 1) & BMASK);
let mut cnt = 0usize; // count the twinprimes in the segment
for &m in &seg[0..=(kn - 1) >> S] { cnt += m.count_zeros() as usize; }
if cnt > 0 { // if segment has cousinprimes
sum += cnt; // add the segment count to total count
let mut upk = kn - 1; // from end of seg count back to largest cp
while *seg.get_unchecked(upk >> S) & (1 << (upk & BMASK)) != 0 { upk -= 1 }
hi_cp = ki + upk; // set resgroup value for largest cp in seg
}
ki += ks; // set 1st resgroup val of next seg slice
if ki < k_max { seg.fill(0); } // set seg to all primes
} // when sieve done, numerate largest cp in range
// for small ranges w/o cousins, set largest to 0
hi_cp = if r_hi > end_num || sum == 0 { 0 } else { hi_cp * modpg + r_hi };
(hi_cp, sum) // return largest cousinprime|cousins count
}
}
fn main() {
let mut val = String::new(); // Inputs are 1|2 values < 2**64; w/wo underscores: 12345|12_345
std::io::stdin().read_line(&mut val).expect("Failed to read line");
let mut substr_iter = val.split_whitespace();
let mut next_or_default = |def| -> usize {
substr_iter.next().unwrap_or(def).replace('_', "").parse().expect("Input is not a number")
};
let mut end_num = std::cmp::max(next_or_default("3"), 3); // min vals 3
let mut start_num = std::cmp::max(next_or_default("3"), 3);
if start_num > end_num { std::mem::swap(&mut end_num, &mut start_num) }
start_num |= 1; // if start_num even increase by 1
end_num = (end_num - 1) | 1; // if end_num even decrease by 1
if end_num - start_num < 4 { end_num = 7; start_num = 7 }
println!("threads = {}", num_cpus::get());
let ts = SystemTime::now(); // start timing sieve setup execution
// select Pn, set sieving params for inputs
let (modpg, res_0, ks, kmin, kmax, krange, pairscnt, rescousins, resinvrs) = set_sieve_parameters(start_num, end_num);
// create sieve primes <= sqrt(end_num), only use those whose multiples within inputs range
let primes: Vec<usize> = if end_num < 49 { vec![5] }
else { sozp5(end_num.integer_sqrt(), res_0, start_num, end_num) };
println!("each of {} threads has nextp[2 x {}] array", pairscnt, primes.len());
print_time("setup time", ts); // sieve setup time
println!("perform cousinprimes ssoz sieve");
let t1 = SystemTime::now(); // start timing ssoz sieve execution
// sieve each cousinpair restracks in parallel
let (lastcousins, cnts): (Vec<_>, Vec<_>) = { // store outputs in these arrays
let counter = RelaxedCounter::new();
rescousins.par_iter().map( |r_hi| {
let out = cousins_sieve(*r_hi, kmin, kmax, ks, start_num, end_num, modpg, &primes, &resinvrs);
print!("\r{} of {} cousinpairs done", counter.increment(), pairscnt);
out
}).unzip()
};
let mut cousinscnt = 0usize; // number of cousinprimes in range
let mut last_cousin = 0usize; // largest hi_cp cousin in range
if end_num < 49 { // check for cousins in low ranges
for c_hi in [11, 17, 23, 41, 47 ].into_iter() {
if start_num <= c_hi - 4 && c_hi <= end_num { last_cousin = c_hi; cousinscnt += 1}
} }
else { // check for cousins from sieve
for (i, cnt_i) in cnts.iter().enumerate() {
cousinscnt += cnt_i;
if last_cousin < lastcousins[i] { last_cousin = lastcousins[i]; }
} }
// account for 1st cousin prime, defined as (3, 7)
if start_num == 3 && end_num > 6 { cousinscnt += 1; if end_num < 11 { last_cousin = 7 }}
let mut kn = krange % ks; // set number of resgroups in last slice
if kn == 0 { kn = ks }; // if multiple of seg size set to seg size
print_time("\nsieve time", t1); // ssoz sieve time
print_time("total time", ts); // setup + sieve time
println!("last segment = {} resgroups; segment slices = {}", kn, (krange - 1)/ks + 1);
println!("total cousins = {}; last cousin = {}|-4", cousinscnt, last_cousin);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment