Last active
August 12, 2024 16:49
-
-
Save jzakiya/e2fa7211b52a4aa34a4de932010eac69 to your computer and use it in GitHub Desktop.
Cousin Primes generator, multi-threaded, using SSoZ (Segmented Sieve of Zakiya), written in Nim
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[ | |
This Nim source file is a multiple threaded implementation to perform an | |
extremely fast Segmented Sieve of Zakiya (SSoZ) to find Cousin Primes <= N. | |
Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1. | |
Output is the number of cousin primes <= N, or in range N1 to N2; the last | |
cousin prime value for the range; and the total time of execution. | |
Code originally developed on a System76 laptop with an Intel I7 6700HQ cpu, | |
2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning | |
would be needed to optimize for other hardware systems (ARM, PowerPC, etc). | |
For Nim >= 2.0.0 compile as: | |
$ nim c --cc:gcc --mm:arc --d:danger --d:release --threads:on --d:useMalloc cousinprimes_ssoz.nim | |
Then run executable: $ ./cousinprimes_ssoz <cr>, and enter range values. | |
Or alternatively: $ echo <range1> <range2> | ./cousinprimes_ssoz | |
Range values can be provided in either order (larger or smaller first). | |
This source file, and updates, will be available here: | |
https://gist.github.com/jzakiya/e2fa7211b52a4aa34a4de932010eac69 | |
Mathematical and technical basis for implementation are explained here: | |
https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_ | |
Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_ | |
for_the_Riemann_Hypotheses | |
https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_ | |
https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK | |
https://www.academia.edu/81206391/Twin_Primes_Segmented_Sieve_of_Zakiya_SSoZ_Explained | |
This code is provided free and subject to copyright and terms of the | |
GNU General Public License Version 3, GPLv3, or greater. | |
License copy/terms are here: http://www.gnu.org/licenses/ | |
Copyright (c) 2017-2024 Jabari Zakiya -- jzakiya at gmail dot com | |
Version Date: 2024/08/12 | |
]# | |
import math # for sqrt, gcd, mod functions | |
import strutils, typetraits # for number input | |
import times, os # for timing code execution | |
import osproc # for getting threads count | |
import threadpool # for parallel processing | |
import algorithm # for sort | |
import bitops # for countSetBits | |
{.experimental: "parallel".} # required to use 'parallel' (>= 1.4.x) | |
proc modinv(a0, b0: int): int = | |
## Compute modular inverse a^-1 of a to base b, e.g. a*(a^-1) mod b = 1. | |
var (a, b, x0) = (a0, b0, 0) | |
result = 1 | |
if b == 1: return | |
while a > 1: | |
result -= (a div b) * x0 | |
a = a mod b | |
swap a, b | |
swap x0, result | |
if result < 0: result += b0 | |
proc genPGparameters(prime: int): (int, int, int, seq[int], seq[int]) = | |
## Create prime generator parameters for given Pn | |
echo("using Prime Generator parameters for P", prime) | |
let primes = [2, 3, 5, 7, 11, 13, 17, 19, 23] | |
var (modpg, res_0) = (1, 0) # compute Pn's modulus and res_0 value | |
for prm in primes: (res_0 = prm; if prm > prime: break; modpg *= prm) | |
var rescousins: seq[int] = @[] # save upper cousin pair residues here | |
var inverses = newSeq[int](modpg+2) # save Pn's residues inverses here | |
var (rc, inc, res) = (5, 2, 0) # use P3's PGS to generate residue candidates | |
var midmodpg = modpg shr 1; # mid value of modpg | |
while rc < midmodpg: # find PG's 1st half residues | |
if gcd(modpg, rc) == 1: # if rc is a residue | |
let mc = modpg - rc # create its modular complement | |
inverses[rc] = modinv(rc,modpg) # save rc amd mc inverses | |
inverses[mc] = modinv(mc,modpg) # if in cousinpair save both hi residues | |
if res + 4 == rc: rescousins.add(rc); rescousins.add(mc + 4) | |
res = rc # save current found residue | |
rc += inc; inc = inc xor 0b110 # create next P3 sequence pc: 5 7 11 13 17 19 ... | |
rescousins.add(midmodpg + 2); rescousins.sort(system.cmp[int]) # create pivot hi_cp | |
inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1 # last 2 resdiues are self inverses | |
(modpg, res_0, rescousins.len, rescousins, inverses) | |
# Global parameters | |
var | |
cnts: seq[uint] # cousins count for each cousinpair | |
lastcousins: seq[uint] # largest hi_cp for each cousinpair | |
proc setSieveParameters(start_num, end_num: uint): (uint, int, uint, uint, uint, uint, int, seq[int], seq[int]) = | |
## Select at runtime best PG and segment size factor to use for input value. | |
## These are good estimates derived from PG data profiling. Can be improved. | |
let range = end_num - start_num | |
var (Bn, pg) = (0, 3) | |
if end_num < 49'u: | |
Bn = 1; pg = 3 | |
elif range < 77_000_000: | |
Bn = 16; pg = 5 | |
elif range < 1_100_000_000'u: | |
Bn = 32; pg = 7 | |
elif range < 35_500_000_000'u: | |
Bn = 64; pg = 11 | |
elif range < 14_000_000_000_000'u: | |
pg = 13 | |
if range > 7_000_000_000_000'u: Bn = 384 | |
elif range > 2_500_000_000_000'u: Bn = 320 | |
elif range > 250_000_000_000'u: Bn = 196 | |
else: Bn = 128 | |
else: | |
Bn = 384; pg = 17 | |
let (modpg, res_0, pairscnt, rescousins, resinvrs) = genPGparameters(pg) | |
let Kmin = (start_num - 2) div modpg.uint + 1 # number of resgroups to start_num | |
let Kmax = (end_num - 2) div modpg.uint + 1 # number of resgroups to end_num | |
let krange = Kmax - Kmin + 1 # number of range resgroups, at least 1 | |
let n = if krange < 37_500_000_000_000'u: 10 elif krange < 975_000_000_000_000'u: 16 else: 20 | |
let B = uint(Bn * 1024 * n) # set seg size to optimize for selected PG | |
let Ks = if krange < B: krange else: B # segments resgroups size | |
echo("segment size = ",Ks," resgroups; seg array is [1 x ",((Ks-1) shr 6) + 1,"] 64-bits") | |
let maxpairs = krange.int * pairscnt; # maximum number of cousinprime pcs | |
echo("cousinprime candidates = ", maxpairs, "; resgroups = ", krange); | |
(modpg.uint, res_0, Ks, Kmin, Kmax, krange, pairscnt, rescousins, resinvrs) | |
proc sozp5(val: int | int64, res_0: int, start_num, end_num: uint): seq[int] = | |
## Return the primes r0..sqrt(end_num) within range (start_num...end_num) | |
let (md, rescnt) = (30, 8) # P5's modulus and residues count | |
let res = [7,11,13,17,19,23,29,31] # P5's residues list | |
let bitn = [0,0,0,0,0,1,0,0,0,2,0,4,0,0,0,8,0,16,0,0,0,32,0,0,0,0,0,64,0,128] | |
let range_size = end_num - start_num # integers size for inputs range | |
let kmax = (val - 2) div md + 1 # number of resgroups to input value | |
var prms = newSeq[uint8](kmax) # byte array of prime candidates init '0' | |
let sqrtn = int(sqrt float64(val)) # compute integer sqrt of val | |
var k = sqrtn div md # compute its resgroup value | |
var (resk, r) = (sqrtn - md*k, 0) # compute its residue value; set residue start posn | |
while resk >= res[r]: r += 1 # find largest residue <= sqrtn posn in its resgroup | |
let pcs_to_sqrtn = k*rescnt + r # number of pcs <= sqrtn | |
for i in 0..pcs_to_sqrtn: # for r0..sqrtN primes mark their multiples | |
let (k, r) = (i div rescnt, i mod rescnt) # update resgroup parameters | |
if (prms[k] and uint8(1 shl r)) != 0: continue # skip pc if not prime | |
let prm_r = res[r] # if prime save its residue value | |
let prime = md*k + prm_r # numerate its prime value | |
let rem = start_num mod uint(prime) # prime's modular distance to start_num | |
if not(uint(prime) - rem <= range_size or rem == 0): continue # skip prime if no multiple in range | |
for ri in res: # mark prime's multiples in prms | |
let prod = prm_r * ri - 2 # compute cross-product for prm_r|ri pair | |
let bit_r = uint8(bitn[prod mod md]) # bit mask value for prod's residue | |
var kpm = k * (prime + ri) + prod div md # resgroup for prime's 1st multiple | |
while kpm < kmax: prms[kpm] = prms[kpm] or bit_r; kpm += prime | |
# prms now contains the prime multiples positions marked for the pcs r0..N | |
# now along each restrack, identify|numerate the primes in each resgroup | |
# return only the primes with a multiple within range (start_num...end_num) | |
result = @[] # create empty dynamic array for primes | |
for r, res_r in res: # along each restrack|row til end | |
for k, resgroup in prms: # for each resgroup along restrack | |
if (resgroup and uint8(1 shl r)) == 0: # if bit location a prime | |
let prime = md * k + res_r # numerate its value, store if in range | |
# check if prime has multiple in range, if so keep it, if not don't | |
let rem = start_num mod prime.uint | |
if (prime in res_0..val) and (prime.uint - rem <= range_size or rem == 0): result.add(prime) | |
proc nextp_init(hi_r: int, kmin: uint, modpg: int, primes: seq[int], resinvrs: seq[int]): seq[uint64] = | |
## Initialize 'nextp' array for cousinpair upper residue rhi in 'rescousins'. | |
## Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and | |
## store consecutively as lo_cp|hi_cp pairs for their restracks. | |
var nextp = newSeq[uint64](primes.len * 2) # 1st mults array for cousinpair | |
let (r_hi, r_lo) = (hi_r, hi_r - 4) # upper|lower cousinpair residues vals | |
for j, prime in primes: # for each prime r0..sqrt(N) | |
let k = (prime - 2) div modpg # find the resgroup it's in | |
let r = (prime - 2) mod modpg + 2 # and its residue value | |
let r_inv = resinvrs[r] # and its residue inverse | |
let rl = (r_lo * r_inv - 2) mod modpg + 2 # compute r's ri for r_lo | |
let rh = (r_hi * r_inv - 2) mod modpg + 2 # compute r's ri for r_hi | |
var kl = uint(k * (prime + rl) + (r * rl - 2) div modpg) # and 1st mult | |
var kh = uint(k * (prime + rh) + (r * rh - 2) div modpg) # and 1st mult | |
if kl < kmin: # if 1st mult index < start_num's | |
kl = (kmin - kl) mod prime.uint # how many indices short is it | |
if kl > 0'u: kl = prime.uint - kl # adjust index value into range | |
else: kl = kl - kmin # else here, adjust index if it was > | |
if kh < kmin: # if 1st mult index < start_num's | |
kh = (kmin - kh) mod prime.uint # how many indices short is it | |
if kh > 0'u: kh = prime.uint - kh # adjust index value into range | |
else: kh = kh - kmin # else here, adjust index if it was > | |
nextp[j * 2] = kl # lo_cp index val >= start of range | |
nextp[j * 2 or 1] = kh # hi_cp index val >= start of range | |
result = nextp | |
proc cousins_sieve(r_hi, kmin, kmax, Ks, start_num, end_num, modpg: uint, primes: seq[int], resinvrs: seq[int], indx: int) = | |
## Perform in thread the ssoz for given cousinpair residues for Kmax resgroups. | |
## First create|init 'nextp' array of 1st prime mults for given cousinpair, | |
## stored consequtively in 'nextp', and init seg array for Ks resgroups. | |
## For sieve, mark resgroup bits to '1' if either cousinpair restrack is nonprime | |
## for primes mults resgroups, and update 'nextp' restrack slices acccordingly. | |
## Find last cousin prime|sum for range, store at array[indx] for this cousinpair. | |
## Can optionally compile to print mid cousinprime values generated by cousinpair. | |
{.gcsafe.}: | |
const S = 6 # shift value for 64 bits | |
const BMASK = (1 shl S) - 1 # bitmask val for 64 bits | |
var (sum, Ki, Kn) = (0'u, kmin-1, Ks.int) # init these parameters | |
var (hi_cp, Kmax) = (0'u, kmax) # max cousinprime|resgroup val | |
var seg = newSeq[uint](((Ks-1) shr S) + 1) # seg array for Ks resgroups | |
if r_hi - 4 < (start_num - 2) mod modpg + 2: Ki.inc # ensure lo cp in range | |
if r_hi > (end_num - 2) mod modpg + 2: Kmax.dec # ensure hi cp in range | |
var nextp = nextp_init(r_hi.int, Ki, modpg.int, primes, resinvrs) # init nextp array | |
while Ki < Kmax: # for Ks resgroup size slices upto Kmax | |
if Ks > (Kmax - Ki): Kn = int(Kmax - Ki) # adjust Kn size for last seg | |
for j, prime in primes: # for each prime r0..sqrt(N) | |
# for lower cousinpair residue track | |
var k1 = nextp[j * 2].int # starting from this resgroup in seg | |
while k1 < Kn: # mark primenth resgroup bits prime mults | |
seg[k1 shr S] = seg[k1 shr S] or (1'u shl (k1 and BMASK)).uint | |
k1 += prime # set next prime multiple resgroup | |
nextp[j * 2] = uint(k1 - Kn) # save 1st resgroup in next eligible seg | |
# for upper cousinpair residue track | |
var k2 = nextp[j * 2 or 1].int # starting from this resgroup in seg | |
while k2 < Kn: # mark primenth resgroup bits prime mults | |
seg[k2 shr S] = seg[k2 shr S] or (1'u shl (k2 and BMASK)).uint | |
k2 += prime # set next prime multiple resgroup | |
nextp[j * 2 or 1] = uint(k2 - Kn) # save 1st resgroup in next eligible seg | |
# set as nonprime unused bits in last seg[n] | |
# so fast, do for every seg[i] | |
seg[(Kn-1) shr S] = seg[(Kn-1) shr S] or (not 1'u shl ((Kn-1) and BMASK)).uint | |
var cnt = 0 # count the twinprimes in the segment | |
for m in seg[0..(Kn-1) shr S]: cnt += (1 shl S) - countSetBits(m) | |
if cnt > 0: # if segment has cousin primes | |
sum += cnt.uint # add segment count to total range count | |
var upk = Kn - 1 # from end of seg, count down to largest cp | |
while (seg[upk shr S] and (1'u shl (upk and BMASK)).uint) != 0: upk.dec | |
hi_cp = Ki + upk.uint # numerate its full resgroup value | |
Ki += Ks # set 1st resgroup val of next seg slice | |
if Ki < Kmax: (for b in 0..(Kn - 1) shr S: seg[b] = 0) # set seg to all primes | |
# when sieve done for full range | |
# numerate largest cousinprime in segs | |
hi_cp = if r_hi > end_num: 0'u else: hi_cp * modpg.uint + r_hi | |
lastcousins[indx] = if sum == 0: 0'u else: hi_cp # store final seg cp value | |
cnts[indx] = sum # sum for cousin pair | |
proc cousinprimes_ssoz() = | |
## Main routine to setup, time, and display results for cousin primes sieve. | |
let x = stdin.readline.split # Inputs are 1 or 2 range values < 2**64 | |
var end_num = max(x[0].parseUInt, 3'u) | |
var start_num = if x.len > 1: max(x[1].parseUInt, 3'u) else: 3'u | |
if start_num > end_num: swap start_num, end_num | |
start_num = start_num or 1 # if start_num even add 1 | |
end_num = (end_num - 1) or 1 # if end_num even subtract 1 | |
if end_num - start_num < 4: (start_num, end_num) = (7'u, 7'u) | |
echo("threads = ", countProcessors()) | |
let ts = epochTime() # start timing sieve setup execution | |
# select Pn, set sieving params for inputs | |
let (modpg, res_0, Ks, Kmin, Kmax, Krange, | |
pairscnt, rescousins, resinvrs) = setSieveParameters(start_num, end_num) | |
# create sieve primes <= sqrt(end_num), only use those whose multiples within inputs range | |
let primes: seq[int] = if end_num < 49: @[5] | |
else: sozp5(int(sqrt float64(end_num)), res_0, start_num, end_num) | |
echo("each of ", pairscnt, " threads has nextp[", 2, " x ", primes.len, "] array") | |
let te = epochTime() - ts # sieve setup time | |
echo("setup time = ", te.formatFloat(ffDecimal, 6), " secs") | |
echo("perform cousinprimes ssoz sieve") | |
let t1 = epochTime() # start timing ssoz sieve execution | |
cnts = newSeq[uint](pairscnt) # cousins count for each cousinpair | |
lastcousins = newSeq[uint](pairscnt) # largest hi_cp for each cousinpair | |
#parallel: # perform in parallel | |
for indx, r_hi in rescousins: # for each cousin pair row index | |
spawn cousins_sieve(r_hi.uint, Kmin, Kmax, Ks, start_num, end_num, modpg, primes, resinvrs, indx) | |
stdout.write("\r", (indx + 1), " of ", pairscnt, " cousinpairs done") | |
sync() # when all the threads finish | |
var cousinscnt = 0'u # count of cousinprimes in range | |
var last_cousin = 0'u # largest hi_cp cousin in range | |
if end_num < 49: # check for cousins in low ranges | |
for c_hi in [11, 17, 23, 41, 47]: | |
if start_num <= uint(c_hi - 4) and c_hi.uint <= end_num: last_cousin = c_hi.uint; cousinscnt += 1 | |
else: # check for cousins from sieve | |
for i in 0 ..< pairscnt: # find largest cousinprime|count in range | |
cousinscnt += cnts[i] | |
if last_cousin < lastcousins[i]: last_cousin = lastcousins[i] | |
# account for 1st cousin prime, defined as (3, 7) | |
if start_num == 3 and end_num > 6: cousinscnt += 1; (if end_num < 11: last_cousin = 7) | |
var Kn = Krange mod Ks # set number of resgroups in last slice | |
if Kn == 0: Kn = Ks # if multiple of seg size set to seg size | |
let t2 = epochTime() - t1 # sieve execution time | |
echo("\nsieve time = ", t2.formatFloat(ffDecimal, 3), " secs") | |
echo("total time = ", (t2 + te).formatFloat(ffDecimal, 3), " secs") | |
echo("last segment = ", Kn, " resgroups; segment slices = ", (Krange - 1) div Ks + 1) | |
echo("total cousins = ", cousinscnt, "; last cousin = ", last_cousin, "|-4") | |
cousinprimes_ssoz() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment