Skip to content

Instantly share code, notes, and snippets.

@jzakiya

jzakiya/twinprimes_ssoz.go

Last active Mar 16, 2021
Embed
What would you like to do?
Twinprimes generator, multi-threaded, using SSoZ (Segmented Sieve of Zakiya), written in Go.
// This Go source file is a multiple threaded implementation to perform an
// extremely fast Segmented Sieve of Zakiya (SSoZ) to find Twin Primes <= N.
// Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1.
// Output is the number of twin primes <= N, or in range N1 to N2; the last
// twin prime value for the range; and the total time of execution.
// This code was developed on a System76 laptop with an Intel I7 6700HQ cpu,
// 2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning
// probably needed to optimize for other hardware systems (ARM, PowerPC, etc).
// Compile as: $ go build twinprimes_ssoz.go
// To reduce binary size do: $ strip twinprimes_ssoz
// Single val: $ echo val | ./twinprimes_ssoz
// Range vals: $ echo val1 val2 | ./twinprimes_ssoz
// Mathematical and technical basis for implementation are explained here:
// https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_
// Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_
// for_the_Riemann_Hypotheses
// https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_
// https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK
// This source code, and its updates, can be found here:
// https://gist.github.com/jzakiya/fbc77b8fdd12b0581a0ff7c2476373d9
// This code is provided free and subject to copyright and terms of the
// GNU General Public License Version 3, GPLv3, or greater.
// License copy/terms are here: http://www.gnu.org/licenses/
// Copyright (c) 2017-2021; Jabari Zakiya -- jzakiya at gmail dot com
// Last update: 2021/3/16
package main
import (
"fmt"
"time"
"sort"
"sync"
"math"
"math/bits"
"runtime"
)
// Customized gcd for prime generators; n > m; m odd
func gcd(m, n int) int {
for m | 1 != 1 { m, n = n % m, m }
return m
}
// Compute modular inverse a^-1 to base m, e.g. a*(a^-1) mod m = 1
func modinv(a0, m0 int) int {
if m0 == 1 { return 1 }
a, m := a0, m0
x0, inv := 0, 1
for a > 1 {
inv -= (a / m) * x0
a, m = m, a % m
x0, inv = inv, x0
}
if inv < 0 { inv += m0 }
return inv
}
func gen_pg_parameters(prime int) (uint, uint, int, []int, []int) {
// Create prime generator parameters for given Pn
fmt.Printf("using Prime Generator parameters for P%d\n", prime)
primes := []int{2, 3, 5, 7, 11, 13, 17, 19, 23}
modpg, res_0 := 1, 0 // compute Pn's modulus and res_0 value
for _, prm := range primes { res_0 = prm; if prm > prime { break }; modpg *= prm }
restwins := []int{} // save upper twinpair residues here
inverses := make([]int, modpg + 2) // save Pn's residues inverses here
pc, inc, res := 5, 2, 0 // use P3's PGS to generate pcs
for pc < modpg / 2 { // find a residue, then complement|inverse
if gcd(pc, modpg) == 1 { // if pc a residue
inverses[pc] = modinv(pc, modpg) // save its inverse
var mc = modpg - pc // create its modular complement
inverses[mc] = modinv(mc, modpg) // save its inverse
if res + 2 == pc { restwins = append(restwins, pc, mc + 2) } // save hi_tp residues
res = pc // save current found residue
}
pc += inc; inc ^= 0b110 // create next P3 sequence pc: 5 7 11 13 17 19 ...
}
sort.Ints(restwins); restwins = append(restwins, modpg + 1) // last residue is last hi_tp
inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1 // last 2 residues are self inverses
return uint(modpg), uint(res_0), len(restwins) , restwins, inverses
}
func set_sieve_parameters(start_num, end_num uint) (uint, uint, uint, uint, uint, uint, int, []int, []int) {
// Select at runtime best PG and segment size parameters for input values.
// These are good estimates derived from PG data profiling. Can be improved.
trange := end_num - start_num
var bn, pg = 0, 3
if end_num < 49 {
bn, pg = 1, 3
} else if trange < 10_000_000 {
bn, pg = 16, 5
} else if trange < 1_100_000_000 {
bn, pg = 32, 7
} else if trange < 35_500_000_000 {
bn, pg = 64, 11
} else if trange < 15_000_000_000_000 {
pg = 13
if trange > 7_000_000_000_000 { bn = 384
} else if trange > 2_500_000_000_000 { bn = 320
} else if trange > 250_000_000_000 { bn = 196
} else { bn = 128 }
} else {
bn, pg = 384, 17
}
modpg, res_0, pairscnt, restwins, resinvrs := gen_pg_parameters(pg)
kmin := (start_num-2) / modpg + 1 // number of resgroups to start_num
kmax := (end_num - 2) / modpg + 1 // number of resgroups to end_num
krange := kmax - kmin + 1 // number of resgroups in range, at least 1
var kb, n = uint(0), 0
if krange < 37_500_000_000_000 { n = 4 } else if krange < 975_000_000_000_000 { n = 6 } else { n = 8}
b := uint(bn * 1024 * n) // set seg size to optimize for selected PG
if krange < b { kb = krange } else { kb = b } // segments resgroups size
fmt.Printf("segment size = %d resgroups; seg array is [1 x %d] 64-bits\n", kb, (((kb-1) >> 6) + 1))
maxpairs := krange * uint(pairscnt) // maximum number of twinprime pcs
fmt.Printf("twinprime candidates = %d; resgroups = %d\n", maxpairs, krange)
return modpg, res_0, kb, kmin, kmax, krange, pairscnt, restwins, resinvrs
}
func sozpg(val, res_0 uint) []uint {
// Compute the primes r0..sqrt(input_num) and store in 'primes' array.
// Any algorithm (fast|small) is usable. Here the SoZ for P5 is used.
md, rscnt := 30, 8 // P5's modulus and residues count
res := []int{7,11,13,17,19,23,29,31} // P5's residues
posn := []int{0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,3,0,4,0,0,0,5,0,0,0,0,0,6,0,7}
kmax := (val - 7) / uint(md) + 1 // number of resgroups upto input value
prms := make([]uint8, kmax) // byte array of prime candidates, init '0'
sqrt_n := int(math.Sqrt(float64(val))) // compute integer sqrt of val
modk, r, k := 0, -1, 0 // initialize residue parameters
for true { // for r0..sqrtN primes mark their multiples
r++; if r == rscnt { r = 0; modk += md; k += 1 }
if (prms[k] & (1 << r)) != 0 { continue } // skip pc if not prime
var prm_r = res[r] // if prime save its residue value
var prime = modk + prm_r // numerate the prime value
if prime > sqrt_n { break } // we're finished when it's > sqrtN
for _, ri := range res { // mark prime's multiples in prms
prod := prm_r * ri - 2 // compute cross-product for prm_r|ri pair
bit_r := uint8(1 << posn[prod % md]) // bit mask for prod's residue
var kpm = k * (prime + ri) + prod / md // 1st resgroup for prime mult
for uint(kpm) < kmax { prms[kpm] |= bit_r; kpm += prime }
} }
// prms now contains the nonprime positions for the prime candidates r0..N
// extract primes into ssoz var 'primes'
primes := []uint{} // create empty dynamic array for primes
for k, resgroup := range prms { // for each kth residue group
for i, r_i := range res { // check for each ith residue in resgroup
if resgroup & (1 << i) == 0 { // if bit location a prime
prime := uint(md * k + r_i) // numerate its value, store if in range
if res_0 <= prime && prime <= val { primes = append(primes, prime) }
} } }
return primes
}
func nextp_init(rhi, kmin, modpg uint, primes []uint, resinvrs []int) []uint {
// Initialize 'nextp' array for twinpair upper residue rhi in 'restwins'.
// Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and
// store consecutively as lo_tp|hi_tp pairs for their restracks.
nextp := make([]uint,len(primes)*2) // 1st mults array for twinpair
r_hi, r_lo := rhi, rhi - 2 // upper|lower twinpair residue values
for j, prime := range primes { // for each prime r0..sqrt(N)
k := (prime - 2) / modpg // find the resgroup it's in
r := (prime - 2) % modpg + 2 // and its residue value
r_inv := uint(resinvrs[r]) // and residue inverse
var ri = (r_lo * r_inv - 2) % modpg + 2 // compute r's ri for r_lo
var ki = k * (prime + ri) + (r * ri - 2) / modpg // and 1st mult
if ki < kmin { ki = (kmin - ki) % prime; if ki > 0 { ki = prime } } else { ki -= kmin }
nextp[j << 1] = uint(ki) // prime's 1st mult resgroup val in range for lo_tp
ri = (r_hi * r_inv - 2) % modpg + 2 // compute r's ri for r_hi
ki = k * (prime + ri) + (r * ri - 2) / modpg // and 1st mult resgroup
if ki < kmin { ki = (kmin - ki) % prime; if ki > 0 { ki = prime } } else { ki -= kmin }
nextp[j << 1 | 1] = uint(ki) // prime's 1st mult resgroup val in range for hi_tp
}
return nextp
}
func twins_sieve(rhi int, kmin, kmax, kb, start_num, end_num, modpg uint,
primes []uint, resinvrs []int) (uint, uint) {
// Perform in thread the ssoz for given twinpair residues for Kmax resgroups.
// First create|init 'nextp' array of 1st prime mults for given twinpair,
// stored consequtively in 'nextp', and init seg array for kb resgroups.
// For sieve, mark resgroup bits to '1' if either twinpair restrack is nonprime
// for primes mults resgroups, and update 'nextp' restrack slices acccordingly.
// Return the last twinprime|sum for the range for this twinpair residues.
s := 6 // shift value for 64 bits
bmask := uint((1 << s) - 1) // bitmask val for 64 bits
sum, ki, kn, r_hi := 0, kmin-1, kb, uint(rhi) // init these parameters
hi_tp, k_max := uint(0), kmax // max twinprime|resgroup
seg := make([]uint64, ((kb - 1) >> s) + 1) // seg arry of kb resgroups
if (ki * modpg) + r_hi - 2 < start_num {ki += 1} // ensure lo tp in range
if (k_max - 1) * modpg + r_hi > end_num {k_max -= 1} // ensure hi tp in range
nextp := nextp_init(r_hi, ki, modpg, primes, resinvrs) // init nextp array
for ki := ki; ki < k_max; ki += kb { // sieve kb size slices upto kmax
if k_max - ki <= kb { // adjust kn size for last seg
kn = k_max - ki // set as nonprime unused bits in last
seg[(kn - 1) >> s] |= (^uint64(0) << ((kn - 1) & bmask)) << 1 // seg[i]
}
for j, prime := range primes { // for each prime r0..sqrt(N)
// for lower twinpair residue track
var k uint = nextp[j << 1] // starting from this resgroup in seg
for k < kn { // mark primenth resgroup bits prime mults
seg[k >> s] |= uint64(1) << (k & bmask)
k += prime } // set resgroup for prime's next multiple
nextp[j << 1] = k - kn // save 1st resgroup in next eligible seg
// for upper twinpair residue track
k = nextp[j << 1 | 1] // starting from this resgroup in seg
for k < kn { // mark primenth resgroup bits prime mults
seg[k >> s] |= uint64(1) << (k & bmask)
k += prime } // set resgroup for prime's next multiple
nextp[j << 1 | 1] = k - kn // save 1st resgroup in next eligible seg
}
cnt := 0 // initialize segment twinprimes count
// then count the twinprimes in segment
for i := 0; i <= int(kn - 1) >> s; i++ { cnt += (1 << s) - bits.OnesCount64(seg[i]) }
if cnt > 0 { // if segment has twinprimes
sum += cnt // add segment count to total range count
var upk = kn - 1 // from end of seg, count back to largest tp
for seg[upk >> s] & (uint64(1) << (upk & bmask)) != 0 { upk -= 1 }
hi_tp = upk + ki // set its full range resgroup value
}
// set 1st resgroup val of next seg slice; if not last
if ki + kb < k_max { for i := range seg { seg[i] = 0 } } // set seg bits to all primes
} // when sieve done, numerate largest twin in range
// for small ranges w/o twins set largest to 1
if r_hi > end_num || sum == 0 { hi_tp = 1 } else { hi_tp = hi_tp * modpg + r_hi }
return hi_tp, uint(sum) // return largest twinprime|twins count in range
}
func main() {
var start_num, end_num = uint(3), uint(3)
_, err := fmt.Scan(&end_num, &start_num)
if end_num < 3 { end_num = 3 }
if err != nil { start_num = 3 } else { if start_num < 3 { start_num = 3 } }
if start_num > end_num { start_num, end_num = end_num, start_num }
fmt.Printf("threads = %d\n", runtime.NumCPU())
ts := time.Now() // start timing sieve setup execution
start_num |= 1 // if start_num even increase by 1
end_num = (end_num - 1) | 1 // if end_num even decrease by 1
// select Pn, set sieving params for inputs
modpg, res_0, kb, kmin, kmax, trange,
pairscnt, restwins, resinvrs := set_sieve_parameters(start_num, end_num)
var primes = []uint{} // select sieve primes
if end_num < 49 { primes = []uint{5}
} else { primes = sozpg(uint(math.Sqrt(float64(end_num))), res_0) }
fmt.Printf("each of %d threads has nextp[2 x %d] array\n", pairscnt, len(primes))
twinscnt := uint(0) // init twinprimes range count
lo_range := uint(restwins[0] - 3)// lo_range = lo_tp - 1
for _, tp := range []uint{3, 5, 11, 17} { // excluded low tp values PGs used
if end_num == 3 { break } // if 3 end of range, no twinprimes
if start_num <= tp && tp < lo_range { twinscnt += 1 } // cnt any small tps
}
te := time.Now()
fmt.Printf("setup time = %v \n", te.Sub(ts)) // display sieve setup time
fmt.Println("perform twinprimes ssoz sieve")
t1 := time.Now() // start timing ssoz sieve execution
cnts := make([]uint, pairscnt) // the number of twinprimes found per thread
lastwins := make([]uint, pairscnt) // the largest twinprime val for each thread
var wg sync.WaitGroup
for i, r_hi := range restwins { // sieve twinpair restracks
wg.Add(1)
go func(i, r_hi int) {
defer wg.Done()
l, c := twins_sieve(r_hi, kmin, kmax, kb, start_num, end_num, modpg, primes, resinvrs)
lastwins[i] = l; cnts[i] = c
fmt.Printf("\r%d of %d twinpairs done", (i + 1), pairscnt)
}(i, r_hi)
}
wg.Wait()
fmt.Printf("\r%d of %d twinpairs done", pairscnt, pairscnt)
last_twin := uint(0) // init val for largest twinprime
for i := 0; i < pairscnt; i++ { // find largest twinprime|sum in range
twinscnt += cnts[i]
if last_twin < lastwins[i] { last_twin = lastwins[i] }
}
if end_num == 5 && twinscnt == 1 { last_twin = 5 }
kn := trange % kb // set number of resgroups in last slice
if kn == 0 { kn = kb } // if multiple of seg size set to seg size
t2 := time.Now() // sieve execution time
fmt.Printf("\nsieve time = %v", t2.Sub(t1)) // ssoz sieve time
fmt.Printf("\ntotal time = %v", t2.Sub(ts)) // setup + sieve time
fmt.Printf("\nlast segment = %d resgroups; segment slices = %d\n", kn, (trange - 1)/kb + 1)
fmt.Printf("total twins = %d; last twin = %d+/-1", twinscnt, last_twin - 1)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment