Twinprimes generator, multi-threaded, using SSoZ (Segmented Sieve of Zakiya), written in Go.
// This Go source file is a multiple threaded implementation to perform an | |
// extremely fast Segmented Sieve of Zakiya (SSoZ) to find Twin Primes <= N. | |
// Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1. | |
// Output is the number of twin primes <= N, or in range N1 to N2; the last | |
// twin prime value for the range; and the total time of execution. | |
// This code was developed on a System76 laptop with an Intel I7 6700HQ cpu, | |
// 2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning | |
// probably needed to optimize for other hardware systems (ARM, PowerPC, etc). | |
// Compile as: $ go build twinprimes_ssoz.go | |
// To reduce binary size do: $ strip twinprimes_ssoz | |
// Single val: $ echo val | ./twinprimes_ssoz | |
// Range vals: $ echo val1 val2 | ./twinprimes_ssoz | |
// Mathematical and technical basis for implementation are explained here: | |
// https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_ | |
// Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_ | |
// for_the_Riemann_Hypotheses | |
// https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_ | |
// https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK | |
// This source code, and its updates, can be found here: | |
// https://gist.github.com/jzakiya/fbc77b8fdd12b0581a0ff7c2476373d9 | |
// This code is provided free and subject to copyright and terms of the | |
// GNU General Public License Version 3, GPLv3, or greater. | |
// License copy/terms are here: http://www.gnu.org/licenses/ | |
// Copyright (c) 2017-2021; Jabari Zakiya -- jzakiya at gmail dot com | |
// Last update: 2021/3/16 | |
package main | |
import ( | |
"fmt" | |
"time" | |
"sort" | |
"sync" | |
"math" | |
"math/bits" | |
"runtime" | |
) | |
// Customized gcd for prime generators; n > m; m odd | |
func gcd(m, n int) int { | |
for m | 1 != 1 { m, n = n % m, m } | |
return m | |
} | |
// Compute modular inverse a^-1 to base m, e.g. a*(a^-1) mod m = 1 | |
func modinv(a0, m0 int) int { | |
if m0 == 1 { return 1 } | |
a, m := a0, m0 | |
x0, inv := 0, 1 | |
for a > 1 { | |
inv -= (a / m) * x0 | |
a, m = m, a % m | |
x0, inv = inv, x0 | |
} | |
if inv < 0 { inv += m0 } | |
return inv | |
} | |
func gen_pg_parameters(prime int) (uint, uint, int, []int, []int) { | |
// Create prime generator parameters for given Pn | |
fmt.Printf("using Prime Generator parameters for P%d\n", prime) | |
primes := []int{2, 3, 5, 7, 11, 13, 17, 19, 23} | |
modpg, res_0 := 1, 0 // compute Pn's modulus and res_0 value | |
for _, prm := range primes { res_0 = prm; if prm > prime { break }; modpg *= prm } | |
restwins := []int{} // save upper twinpair residues here | |
inverses := make([]int, modpg + 2) // save Pn's residues inverses here | |
pc, inc, res := 5, 2, 0 // use P3's PGS to generate pcs | |
for pc < modpg / 2 { // find a residue, then complement|inverse | |
if gcd(pc, modpg) == 1 { // if pc a residue | |
inverses[pc] = modinv(pc, modpg) // save its inverse | |
var mc = modpg - pc // create its modular complement | |
inverses[mc] = modinv(mc, modpg) // save its inverse | |
if res + 2 == pc { restwins = append(restwins, pc, mc + 2) } // save hi_tp residues | |
res = pc // save current found residue | |
} | |
pc += inc; inc ^= 0b110 // create next P3 sequence pc: 5 7 11 13 17 19 ... | |
} | |
sort.Ints(restwins); restwins = append(restwins, modpg + 1) // last residue is last hi_tp | |
inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1 // last 2 residues are self inverses | |
return uint(modpg), uint(res_0), len(restwins) , restwins, inverses | |
} | |
func set_sieve_parameters(start_num, end_num uint) (uint, uint, uint, uint, uint, uint, int, []int, []int) { | |
// Select at runtime best PG and segment size parameters for input values. | |
// These are good estimates derived from PG data profiling. Can be improved. | |
trange := end_num - start_num | |
var bn, pg = 0, 3 | |
if end_num < 49 { | |
bn, pg = 1, 3 | |
} else if trange < 10_000_000 { | |
bn, pg = 16, 5 | |
} else if trange < 1_100_000_000 { | |
bn, pg = 32, 7 | |
} else if trange < 35_500_000_000 { | |
bn, pg = 64, 11 | |
} else if trange < 15_000_000_000_000 { | |
pg = 13 | |
if trange > 7_000_000_000_000 { bn = 384 | |
} else if trange > 2_500_000_000_000 { bn = 320 | |
} else if trange > 250_000_000_000 { bn = 196 | |
} else { bn = 128 } | |
} else { | |
bn, pg = 384, 17 | |
} | |
modpg, res_0, pairscnt, restwins, resinvrs := gen_pg_parameters(pg) | |
kmin := (start_num-2) / modpg + 1 // number of resgroups to start_num | |
kmax := (end_num - 2) / modpg + 1 // number of resgroups to end_num | |
krange := kmax - kmin + 1 // number of resgroups in range, at least 1 | |
var kb, n = uint(0), 0 | |
if krange < 37_500_000_000_000 { n = 4 } else if krange < 975_000_000_000_000 { n = 6 } else { n = 8} | |
b := uint(bn * 1024 * n) // set seg size to optimize for selected PG | |
if krange < b { kb = krange } else { kb = b } // segments resgroups size | |
fmt.Printf("segment size = %d resgroups; seg array is [1 x %d] 64-bits\n", kb, (((kb-1) >> 6) + 1)) | |
maxpairs := krange * uint(pairscnt) // maximum number of twinprime pcs | |
fmt.Printf("twinprime candidates = %d; resgroups = %d\n", maxpairs, krange) | |
return modpg, res_0, kb, kmin, kmax, krange, pairscnt, restwins, resinvrs | |
} | |
func sozpg(val, res_0 uint) []uint { | |
// Compute the primes r0..sqrt(input_num) and store in 'primes' array. | |
// Any algorithm (fast|small) is usable. Here the SoZ for P5 is used. | |
md, rscnt := 30, 8 // P5's modulus and residues count | |
res := []int{7,11,13,17,19,23,29,31} // P5's residues | |
posn := []int{0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,3,0,4,0,0,0,5,0,0,0,0,0,6,0,7} | |
kmax := (val - 7) / uint(md) + 1 // number of resgroups upto input value | |
prms := make([]uint8, kmax) // byte array of prime candidates, init '0' | |
sqrt_n := int(math.Sqrt(float64(val))) // compute integer sqrt of val | |
modk, r, k := 0, -1, 0 // initialize residue parameters | |
for true { // for r0..sqrtN primes mark their multiples | |
r++; if r == rscnt { r = 0; modk += md; k += 1 } | |
if (prms[k] & (1 << r)) != 0 { continue } // skip pc if not prime | |
var prm_r = res[r] // if prime save its residue value | |
var prime = modk + prm_r // numerate the prime value | |
if prime > sqrt_n { break } // we're finished when it's > sqrtN | |
for _, ri := range res { // mark prime's multiples in prms | |
prod := prm_r * ri - 2 // compute cross-product for prm_r|ri pair | |
bit_r := uint8(1 << posn[prod % md]) // bit mask for prod's residue | |
var kpm = k * (prime + ri) + prod / md // 1st resgroup for prime mult | |
for uint(kpm) < kmax { prms[kpm] |= bit_r; kpm += prime } | |
} } | |
// prms now contains the nonprime positions for the prime candidates r0..N | |
// extract primes into ssoz var 'primes' | |
primes := []uint{} // create empty dynamic array for primes | |
for k, resgroup := range prms { // for each kth residue group | |
for i, r_i := range res { // check for each ith residue in resgroup | |
if resgroup & (1 << i) == 0 { // if bit location a prime | |
prime := uint(md * k + r_i) // numerate its value, store if in range | |
if res_0 <= prime && prime <= val { primes = append(primes, prime) } | |
} } } | |
return primes | |
} | |
func nextp_init(rhi, kmin, modpg uint, primes []uint, resinvrs []int) []uint { | |
// Initialize 'nextp' array for twinpair upper residue rhi in 'restwins'. | |
// Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and | |
// store consecutively as lo_tp|hi_tp pairs for their restracks. | |
nextp := make([]uint,len(primes)*2) // 1st mults array for twinpair | |
r_hi, r_lo := rhi, rhi - 2 // upper|lower twinpair residue values | |
for j, prime := range primes { // for each prime r0..sqrt(N) | |
k := (prime - 2) / modpg // find the resgroup it's in | |
r := (prime - 2) % modpg + 2 // and its residue value | |
r_inv := uint(resinvrs[r]) // and residue inverse | |
var ri = (r_lo * r_inv - 2) % modpg + 2 // compute r's ri for r_lo | |
var ki = k * (prime + ri) + (r * ri - 2) / modpg // and 1st mult | |
if ki < kmin { ki = (kmin - ki) % prime; if ki > 0 { ki = prime } } else { ki -= kmin } | |
nextp[j << 1] = uint(ki) // prime's 1st mult resgroup val in range for lo_tp | |
ri = (r_hi * r_inv - 2) % modpg + 2 // compute r's ri for r_hi | |
ki = k * (prime + ri) + (r * ri - 2) / modpg // and 1st mult resgroup | |
if ki < kmin { ki = (kmin - ki) % prime; if ki > 0 { ki = prime } } else { ki -= kmin } | |
nextp[j << 1 | 1] = uint(ki) // prime's 1st mult resgroup val in range for hi_tp | |
} | |
return nextp | |
} | |
func twins_sieve(rhi int, kmin, kmax, kb, start_num, end_num, modpg uint, | |
primes []uint, resinvrs []int) (uint, uint) { | |
// Perform in thread the ssoz for given twinpair residues for Kmax resgroups. | |
// First create|init 'nextp' array of 1st prime mults for given twinpair, | |
// stored consequtively in 'nextp', and init seg array for kb resgroups. | |
// For sieve, mark resgroup bits to '1' if either twinpair restrack is nonprime | |
// for primes mults resgroups, and update 'nextp' restrack slices acccordingly. | |
// Return the last twinprime|sum for the range for this twinpair residues. | |
s := 6 // shift value for 64 bits | |
bmask := uint((1 << s) - 1) // bitmask val for 64 bits | |
sum, ki, kn, r_hi := 0, kmin-1, kb, uint(rhi) // init these parameters | |
hi_tp, k_max := uint(0), kmax // max twinprime|resgroup | |
seg := make([]uint64, ((kb - 1) >> s) + 1) // seg arry of kb resgroups | |
if (ki * modpg) + r_hi - 2 < start_num {ki += 1} // ensure lo tp in range | |
if (k_max - 1) * modpg + r_hi > end_num {k_max -= 1} // ensure hi tp in range | |
nextp := nextp_init(r_hi, ki, modpg, primes, resinvrs) // init nextp array | |
for ki := ki; ki < k_max; ki += kb { // sieve kb size slices upto kmax | |
if k_max - ki <= kb { // adjust kn size for last seg | |
kn = k_max - ki // set as nonprime unused bits in last | |
seg[(kn - 1) >> s] |= (^uint64(0) << ((kn - 1) & bmask)) << 1 // seg[i] | |
} | |
for j, prime := range primes { // for each prime r0..sqrt(N) | |
// for lower twinpair residue track | |
var k uint = nextp[j << 1] // starting from this resgroup in seg | |
for k < kn { // mark primenth resgroup bits prime mults | |
seg[k >> s] |= uint64(1) << (k & bmask) | |
k += prime } // set resgroup for prime's next multiple | |
nextp[j << 1] = k - kn // save 1st resgroup in next eligible seg | |
// for upper twinpair residue track | |
k = nextp[j << 1 | 1] // starting from this resgroup in seg | |
for k < kn { // mark primenth resgroup bits prime mults | |
seg[k >> s] |= uint64(1) << (k & bmask) | |
k += prime } // set resgroup for prime's next multiple | |
nextp[j << 1 | 1] = k - kn // save 1st resgroup in next eligible seg | |
} | |
cnt := 0 // initialize segment twinprimes count | |
// then count the twinprimes in segment | |
for i := 0; i <= int(kn - 1) >> s; i++ { cnt += (1 << s) - bits.OnesCount64(seg[i]) } | |
if cnt > 0 { // if segment has twinprimes | |
sum += cnt // add segment count to total range count | |
var upk = kn - 1 // from end of seg, count back to largest tp | |
for seg[upk >> s] & (uint64(1) << (upk & bmask)) != 0 { upk -= 1 } | |
hi_tp = upk + ki // set its full range resgroup value | |
} | |
// set 1st resgroup val of next seg slice; if not last | |
if ki + kb < k_max { for i := range seg { seg[i] = 0 } } // set seg bits to all primes | |
} // when sieve done, numerate largest twin in range | |
// for small ranges w/o twins set largest to 1 | |
if r_hi > end_num || sum == 0 { hi_tp = 1 } else { hi_tp = hi_tp * modpg + r_hi } | |
return hi_tp, uint(sum) // return largest twinprime|twins count in range | |
} | |
func main() { | |
var start_num, end_num = uint(3), uint(3) | |
_, err := fmt.Scan(&end_num, &start_num) | |
if end_num < 3 { end_num = 3 } | |
if err != nil { start_num = 3 } else { if start_num < 3 { start_num = 3 } } | |
if start_num > end_num { start_num, end_num = end_num, start_num } | |
fmt.Printf("threads = %d\n", runtime.NumCPU()) | |
ts := time.Now() // start timing sieve setup execution | |
start_num |= 1 // if start_num even increase by 1 | |
end_num = (end_num - 1) | 1 // if end_num even decrease by 1 | |
// select Pn, set sieving params for inputs | |
modpg, res_0, kb, kmin, kmax, trange, | |
pairscnt, restwins, resinvrs := set_sieve_parameters(start_num, end_num) | |
var primes = []uint{} // select sieve primes | |
if end_num < 49 { primes = []uint{5} | |
} else { primes = sozpg(uint(math.Sqrt(float64(end_num))), res_0) } | |
fmt.Printf("each of %d threads has nextp[2 x %d] array\n", pairscnt, len(primes)) | |
twinscnt := uint(0) // init twinprimes range count | |
lo_range := uint(restwins[0] - 3)// lo_range = lo_tp - 1 | |
for _, tp := range []uint{3, 5, 11, 17} { // excluded low tp values PGs used | |
if end_num == 3 { break } // if 3 end of range, no twinprimes | |
if start_num <= tp && tp < lo_range { twinscnt += 1 } // cnt any small tps | |
} | |
te := time.Now() | |
fmt.Printf("setup time = %v \n", te.Sub(ts)) // display sieve setup time | |
fmt.Println("perform twinprimes ssoz sieve") | |
t1 := time.Now() // start timing ssoz sieve execution | |
cnts := make([]uint, pairscnt) // the number of twinprimes found per thread | |
lastwins := make([]uint, pairscnt) // the largest twinprime val for each thread | |
var wg sync.WaitGroup | |
for i, r_hi := range restwins { // sieve twinpair restracks | |
wg.Add(1) | |
go func(i, r_hi int) { | |
defer wg.Done() | |
l, c := twins_sieve(r_hi, kmin, kmax, kb, start_num, end_num, modpg, primes, resinvrs) | |
lastwins[i] = l; cnts[i] = c | |
fmt.Printf("\r%d of %d twinpairs done", (i + 1), pairscnt) | |
}(i, r_hi) | |
} | |
wg.Wait() | |
fmt.Printf("\r%d of %d twinpairs done", pairscnt, pairscnt) | |
last_twin := uint(0) // init val for largest twinprime | |
for i := 0; i < pairscnt; i++ { // find largest twinprime|sum in range | |
twinscnt += cnts[i] | |
if last_twin < lastwins[i] { last_twin = lastwins[i] } | |
} | |
if end_num == 5 && twinscnt == 1 { last_twin = 5 } | |
kn := trange % kb // set number of resgroups in last slice | |
if kn == 0 { kn = kb } // if multiple of seg size set to seg size | |
t2 := time.Now() // sieve execution time | |
fmt.Printf("\nsieve time = %v", t2.Sub(t1)) // ssoz sieve time | |
fmt.Printf("\ntotal time = %v", t2.Sub(ts)) // setup + sieve time | |
fmt.Printf("\nlast segment = %d resgroups; segment slices = %d\n", kn, (trange - 1)/kb + 1) | |
fmt.Printf("total twins = %d; last twin = %d+/-1", twinscnt, last_twin - 1) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment