Skip to content

Instantly share code, notes, and snippets.

@jzakiya
Last active August 12, 2024 16:49
Show Gist options
  • Save jzakiya/e2fa7211b52a4aa34a4de932010eac69 to your computer and use it in GitHub Desktop.
Save jzakiya/e2fa7211b52a4aa34a4de932010eac69 to your computer and use it in GitHub Desktop.
Cousin Primes generator, multi-threaded, using SSoZ (Segmented Sieve of Zakiya), written in Nim
#[
This Nim source file is a multiple threaded implementation to perform an
extremely fast Segmented Sieve of Zakiya (SSoZ) to find Cousin Primes <= N.
Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1.
Output is the number of cousin primes <= N, or in range N1 to N2; the last
cousin prime value for the range; and the total time of execution.
Code originally developed on a System76 laptop with an Intel I7 6700HQ cpu,
2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning
would be needed to optimize for other hardware systems (ARM, PowerPC, etc).
For Nim >= 2.0.0 compile as:
$ nim c --cc:gcc --mm:arc --d:danger --d:release --threads:on --d:useMalloc cousinprimes_ssoz.nim
Then run executable: $ ./cousinprimes_ssoz <cr>, and enter range values.
Or alternatively: $ echo <range1> <range2> | ./cousinprimes_ssoz
Range values can be provided in either order (larger or smaller first).
This source file, and updates, will be available here:
https://gist.github.com/jzakiya/e2fa7211b52a4aa34a4de932010eac69
Mathematical and technical basis for implementation are explained here:
https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_
Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_
for_the_Riemann_Hypotheses
https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_
https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK
https://www.academia.edu/81206391/Twin_Primes_Segmented_Sieve_of_Zakiya_SSoZ_Explained
This code is provided free and subject to copyright and terms of the
GNU General Public License Version 3, GPLv3, or greater.
License copy/terms are here: http://www.gnu.org/licenses/
Copyright (c) 2017-2024 Jabari Zakiya -- jzakiya at gmail dot com
Version Date: 2024/08/12
]#
import math # for sqrt, gcd, mod functions
import strutils, typetraits # for number input
import times, os # for timing code execution
import osproc # for getting threads count
import threadpool # for parallel processing
import algorithm # for sort
import bitops # for countSetBits
{.experimental: "parallel".} # required to use 'parallel' (>= 1.4.x)
proc modinv(a0, b0: int): int =
## Compute modular inverse a^-1 of a to base b, e.g. a*(a^-1) mod b = 1.
var (a, b, x0) = (a0, b0, 0)
result = 1
if b == 1: return
while a > 1:
result -= (a div b) * x0
a = a mod b
swap a, b
swap x0, result
if result < 0: result += b0
proc genPGparameters(prime: int): (int, int, int, seq[int], seq[int]) =
## Create prime generator parameters for given Pn
echo("using Prime Generator parameters for P", prime)
let primes = [2, 3, 5, 7, 11, 13, 17, 19, 23]
var (modpg, res_0) = (1, 0) # compute Pn's modulus and res_0 value
for prm in primes: (res_0 = prm; if prm > prime: break; modpg *= prm)
var rescousins: seq[int] = @[] # save upper cousin pair residues here
var inverses = newSeq[int](modpg+2) # save Pn's residues inverses here
var (rc, inc, res) = (5, 2, 0) # use P3's PGS to generate residue candidates
var midmodpg = modpg shr 1; # mid value of modpg
while rc < midmodpg: # find PG's 1st half residues
if gcd(modpg, rc) == 1: # if rc is a residue
let mc = modpg - rc # create its modular complement
inverses[rc] = modinv(rc,modpg) # save rc amd mc inverses
inverses[mc] = modinv(mc,modpg) # if in cousinpair save both hi residues
if res + 4 == rc: rescousins.add(rc); rescousins.add(mc + 4)
res = rc # save current found residue
rc += inc; inc = inc xor 0b110 # create next P3 sequence pc: 5 7 11 13 17 19 ...
rescousins.add(midmodpg + 2); rescousins.sort(system.cmp[int]) # create pivot hi_cp
inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1 # last 2 resdiues are self inverses
(modpg, res_0, rescousins.len, rescousins, inverses)
# Global parameters
var
cnts: seq[uint] # cousins count for each cousinpair
lastcousins: seq[uint] # largest hi_cp for each cousinpair
proc setSieveParameters(start_num, end_num: uint): (uint, int, uint, uint, uint, uint, int, seq[int], seq[int]) =
## Select at runtime best PG and segment size factor to use for input value.
## These are good estimates derived from PG data profiling. Can be improved.
let range = end_num - start_num
var (Bn, pg) = (0, 3)
if end_num < 49'u:
Bn = 1; pg = 3
elif range < 77_000_000:
Bn = 16; pg = 5
elif range < 1_100_000_000'u:
Bn = 32; pg = 7
elif range < 35_500_000_000'u:
Bn = 64; pg = 11
elif range < 14_000_000_000_000'u:
pg = 13
if range > 7_000_000_000_000'u: Bn = 384
elif range > 2_500_000_000_000'u: Bn = 320
elif range > 250_000_000_000'u: Bn = 196
else: Bn = 128
else:
Bn = 384; pg = 17
let (modpg, res_0, pairscnt, rescousins, resinvrs) = genPGparameters(pg)
let Kmin = (start_num - 2) div modpg.uint + 1 # number of resgroups to start_num
let Kmax = (end_num - 2) div modpg.uint + 1 # number of resgroups to end_num
let krange = Kmax - Kmin + 1 # number of range resgroups, at least 1
let n = if krange < 37_500_000_000_000'u: 10 elif krange < 975_000_000_000_000'u: 16 else: 20
let B = uint(Bn * 1024 * n) # set seg size to optimize for selected PG
let Ks = if krange < B: krange else: B # segments resgroups size
echo("segment size = ",Ks," resgroups; seg array is [1 x ",((Ks-1) shr 6) + 1,"] 64-bits")
let maxpairs = krange.int * pairscnt; # maximum number of cousinprime pcs
echo("cousinprime candidates = ", maxpairs, "; resgroups = ", krange);
(modpg.uint, res_0, Ks, Kmin, Kmax, krange, pairscnt, rescousins, resinvrs)
proc sozp5(val: int | int64, res_0: int, start_num, end_num: uint): seq[int] =
## Return the primes r0..sqrt(end_num) within range (start_num...end_num)
let (md, rescnt) = (30, 8) # P5's modulus and residues count
let res = [7,11,13,17,19,23,29,31] # P5's residues list
let bitn = [0,0,0,0,0,1,0,0,0,2,0,4,0,0,0,8,0,16,0,0,0,32,0,0,0,0,0,64,0,128]
let range_size = end_num - start_num # integers size for inputs range
let kmax = (val - 2) div md + 1 # number of resgroups to input value
var prms = newSeq[uint8](kmax) # byte array of prime candidates init '0'
let sqrtn = int(sqrt float64(val)) # compute integer sqrt of val
var k = sqrtn div md # compute its resgroup value
var (resk, r) = (sqrtn - md*k, 0) # compute its residue value; set residue start posn
while resk >= res[r]: r += 1 # find largest residue <= sqrtn posn in its resgroup
let pcs_to_sqrtn = k*rescnt + r # number of pcs <= sqrtn
for i in 0..pcs_to_sqrtn: # for r0..sqrtN primes mark their multiples
let (k, r) = (i div rescnt, i mod rescnt) # update resgroup parameters
if (prms[k] and uint8(1 shl r)) != 0: continue # skip pc if not prime
let prm_r = res[r] # if prime save its residue value
let prime = md*k + prm_r # numerate its prime value
let rem = start_num mod uint(prime) # prime's modular distance to start_num
if not(uint(prime) - rem <= range_size or rem == 0): continue # skip prime if no multiple in range
for ri in res: # mark prime's multiples in prms
let prod = prm_r * ri - 2 # compute cross-product for prm_r|ri pair
let bit_r = uint8(bitn[prod mod md]) # bit mask value for prod's residue
var kpm = k * (prime + ri) + prod div md # resgroup for prime's 1st multiple
while kpm < kmax: prms[kpm] = prms[kpm] or bit_r; kpm += prime
# prms now contains the prime multiples positions marked for the pcs r0..N
# now along each restrack, identify|numerate the primes in each resgroup
# return only the primes with a multiple within range (start_num...end_num)
result = @[] # create empty dynamic array for primes
for r, res_r in res: # along each restrack|row til end
for k, resgroup in prms: # for each resgroup along restrack
if (resgroup and uint8(1 shl r)) == 0: # if bit location a prime
let prime = md * k + res_r # numerate its value, store if in range
# check if prime has multiple in range, if so keep it, if not don't
let rem = start_num mod prime.uint
if (prime in res_0..val) and (prime.uint - rem <= range_size or rem == 0): result.add(prime)
proc nextp_init(hi_r: int, kmin: uint, modpg: int, primes: seq[int], resinvrs: seq[int]): seq[uint64] =
## Initialize 'nextp' array for cousinpair upper residue rhi in 'rescousins'.
## Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and
## store consecutively as lo_cp|hi_cp pairs for their restracks.
var nextp = newSeq[uint64](primes.len * 2) # 1st mults array for cousinpair
let (r_hi, r_lo) = (hi_r, hi_r - 4) # upper|lower cousinpair residues vals
for j, prime in primes: # for each prime r0..sqrt(N)
let k = (prime - 2) div modpg # find the resgroup it's in
let r = (prime - 2) mod modpg + 2 # and its residue value
let r_inv = resinvrs[r] # and its residue inverse
let rl = (r_lo * r_inv - 2) mod modpg + 2 # compute r's ri for r_lo
let rh = (r_hi * r_inv - 2) mod modpg + 2 # compute r's ri for r_hi
var kl = uint(k * (prime + rl) + (r * rl - 2) div modpg) # and 1st mult
var kh = uint(k * (prime + rh) + (r * rh - 2) div modpg) # and 1st mult
if kl < kmin: # if 1st mult index < start_num's
kl = (kmin - kl) mod prime.uint # how many indices short is it
if kl > 0'u: kl = prime.uint - kl # adjust index value into range
else: kl = kl - kmin # else here, adjust index if it was >
if kh < kmin: # if 1st mult index < start_num's
kh = (kmin - kh) mod prime.uint # how many indices short is it
if kh > 0'u: kh = prime.uint - kh # adjust index value into range
else: kh = kh - kmin # else here, adjust index if it was >
nextp[j * 2] = kl # lo_cp index val >= start of range
nextp[j * 2 or 1] = kh # hi_cp index val >= start of range
result = nextp
proc cousins_sieve(r_hi, kmin, kmax, Ks, start_num, end_num, modpg: uint, primes: seq[int], resinvrs: seq[int], indx: int) =
## Perform in thread the ssoz for given cousinpair residues for Kmax resgroups.
## First create|init 'nextp' array of 1st prime mults for given cousinpair,
## stored consequtively in 'nextp', and init seg array for Ks resgroups.
## For sieve, mark resgroup bits to '1' if either cousinpair restrack is nonprime
## for primes mults resgroups, and update 'nextp' restrack slices acccordingly.
## Find last cousin prime|sum for range, store at array[indx] for this cousinpair.
## Can optionally compile to print mid cousinprime values generated by cousinpair.
{.gcsafe.}:
const S = 6 # shift value for 64 bits
const BMASK = (1 shl S) - 1 # bitmask val for 64 bits
var (sum, Ki, Kn) = (0'u, kmin-1, Ks.int) # init these parameters
var (hi_cp, Kmax) = (0'u, kmax) # max cousinprime|resgroup val
var seg = newSeq[uint](((Ks-1) shr S) + 1) # seg array for Ks resgroups
if r_hi - 4 < (start_num - 2) mod modpg + 2: Ki.inc # ensure lo cp in range
if r_hi > (end_num - 2) mod modpg + 2: Kmax.dec # ensure hi cp in range
var nextp = nextp_init(r_hi.int, Ki, modpg.int, primes, resinvrs) # init nextp array
while Ki < Kmax: # for Ks resgroup size slices upto Kmax
if Ks > (Kmax - Ki): Kn = int(Kmax - Ki) # adjust Kn size for last seg
for j, prime in primes: # for each prime r0..sqrt(N)
# for lower cousinpair residue track
var k1 = nextp[j * 2].int # starting from this resgroup in seg
while k1 < Kn: # mark primenth resgroup bits prime mults
seg[k1 shr S] = seg[k1 shr S] or (1'u shl (k1 and BMASK)).uint
k1 += prime # set next prime multiple resgroup
nextp[j * 2] = uint(k1 - Kn) # save 1st resgroup in next eligible seg
# for upper cousinpair residue track
var k2 = nextp[j * 2 or 1].int # starting from this resgroup in seg
while k2 < Kn: # mark primenth resgroup bits prime mults
seg[k2 shr S] = seg[k2 shr S] or (1'u shl (k2 and BMASK)).uint
k2 += prime # set next prime multiple resgroup
nextp[j * 2 or 1] = uint(k2 - Kn) # save 1st resgroup in next eligible seg
# set as nonprime unused bits in last seg[n]
# so fast, do for every seg[i]
seg[(Kn-1) shr S] = seg[(Kn-1) shr S] or (not 1'u shl ((Kn-1) and BMASK)).uint
var cnt = 0 # count the twinprimes in the segment
for m in seg[0..(Kn-1) shr S]: cnt += (1 shl S) - countSetBits(m)
if cnt > 0: # if segment has cousin primes
sum += cnt.uint # add segment count to total range count
var upk = Kn - 1 # from end of seg, count down to largest cp
while (seg[upk shr S] and (1'u shl (upk and BMASK)).uint) != 0: upk.dec
hi_cp = Ki + upk.uint # numerate its full resgroup value
Ki += Ks # set 1st resgroup val of next seg slice
if Ki < Kmax: (for b in 0..(Kn - 1) shr S: seg[b] = 0) # set seg to all primes
# when sieve done for full range
# numerate largest cousinprime in segs
hi_cp = if r_hi > end_num: 0'u else: hi_cp * modpg.uint + r_hi
lastcousins[indx] = if sum == 0: 0'u else: hi_cp # store final seg cp value
cnts[indx] = sum # sum for cousin pair
proc cousinprimes_ssoz() =
## Main routine to setup, time, and display results for cousin primes sieve.
let x = stdin.readline.split # Inputs are 1 or 2 range values < 2**64
var end_num = max(x[0].parseUInt, 3'u)
var start_num = if x.len > 1: max(x[1].parseUInt, 3'u) else: 3'u
if start_num > end_num: swap start_num, end_num
start_num = start_num or 1 # if start_num even add 1
end_num = (end_num - 1) or 1 # if end_num even subtract 1
if end_num - start_num < 4: (start_num, end_num) = (7'u, 7'u)
echo("threads = ", countProcessors())
let ts = epochTime() # start timing sieve setup execution
# select Pn, set sieving params for inputs
let (modpg, res_0, Ks, Kmin, Kmax, Krange,
pairscnt, rescousins, resinvrs) = setSieveParameters(start_num, end_num)
# create sieve primes <= sqrt(end_num), only use those whose multiples within inputs range
let primes: seq[int] = if end_num < 49: @[5]
else: sozp5(int(sqrt float64(end_num)), res_0, start_num, end_num)
echo("each of ", pairscnt, " threads has nextp[", 2, " x ", primes.len, "] array")
let te = epochTime() - ts # sieve setup time
echo("setup time = ", te.formatFloat(ffDecimal, 6), " secs")
echo("perform cousinprimes ssoz sieve")
let t1 = epochTime() # start timing ssoz sieve execution
cnts = newSeq[uint](pairscnt) # cousins count for each cousinpair
lastcousins = newSeq[uint](pairscnt) # largest hi_cp for each cousinpair
#parallel: # perform in parallel
for indx, r_hi in rescousins: # for each cousin pair row index
spawn cousins_sieve(r_hi.uint, Kmin, Kmax, Ks, start_num, end_num, modpg, primes, resinvrs, indx)
stdout.write("\r", (indx + 1), " of ", pairscnt, " cousinpairs done")
sync() # when all the threads finish
var cousinscnt = 0'u # count of cousinprimes in range
var last_cousin = 0'u # largest hi_cp cousin in range
if end_num < 49: # check for cousins in low ranges
for c_hi in [11, 17, 23, 41, 47]:
if start_num <= uint(c_hi - 4) and c_hi.uint <= end_num: last_cousin = c_hi.uint; cousinscnt += 1
else: # check for cousins from sieve
for i in 0 ..< pairscnt: # find largest cousinprime|count in range
cousinscnt += cnts[i]
if last_cousin < lastcousins[i]: last_cousin = lastcousins[i]
# account for 1st cousin prime, defined as (3, 7)
if start_num == 3 and end_num > 6: cousinscnt += 1; (if end_num < 11: last_cousin = 7)
var Kn = Krange mod Ks # set number of resgroups in last slice
if Kn == 0: Kn = Ks # if multiple of seg size set to seg size
let t2 = epochTime() - t1 # sieve execution time
echo("\nsieve time = ", t2.formatFloat(ffDecimal, 3), " secs")
echo("total time = ", (t2 + te).formatFloat(ffDecimal, 3), " secs")
echo("last segment = ", Kn, " resgroups; segment slices = ", (Krange - 1) div Ks + 1)
echo("total cousins = ", cousinscnt, "; last cousin = ", last_cousin, "|-4")
cousinprimes_ssoz()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment