Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Twinprimes generator, multi-threaded, using SSoZ (Segmented Sieve of Zakiya), written in Nim
#[
This Nim source file is a multiple threaded implementation to perform an
extremely fast Segmented Sieve of Zakiya (SSoZ) to find Twin Primes <= N.
Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1.
Output is the number of twin primes <= N, or in range N1 to N2; the last
twin prime value for the range; and the total time of execution.
This code was developed on a System76 laptop with an Intel I7 6700HQ cpu,
2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning
would be needed to optimize for other hardware systems (ARM, PowerPC, etc).
To compile for nim versions <= 0.19.x (can use --cc:gcc or --cc:clang) do:
$ nim c --cc:gcc --d:release --threads:on twinprimes_ssoz.nim
For Nim >= 0.20.0|1.0-rc1 compile as:
$ nim c --cc:gcc --d:danger --threads:on twinprimes_ssoz.nim
Then run executable: $ ./twinprimes_ssoz <cr>, and enter range values.
Or alternatively: $ echo <range1> <range2> | ./twinprimes_ssoz
Range values can be provided in either order (larger or smaller first).
This source file, and updates, will be available here:
https://gist.github.com/jzakiya/6c7e1868bd749a6b1add62e3e3b2341e
Mathematical and technical basis for implementation are explained here:
https://www.scribd.com/document/395415391/The-Use-of-Prime-Generators-to-
Implement-Fast-Twin-Primes-Sieve-Of-Zakiya-SoZ-Applications-to-Number-Theory-
and-Implications-for-the-Riemann-Hypoth
https://www.scribd.com/doc/228155369/The-Segmented-Sieve-of-Zakiya-SSoZ
https://www.scribd.com/document/266461408/Primes-Utils-Handbook
This code is provided free and subject to copyright and terms of the
GNU General Public License Version 3, GPLv3, or greater.
License copy/terms are here: http://www.gnu.org/licenses/
Copyright (c) 2017-19 Jabari Zakiya -- jzakiya at gmail dot com
Version Date: 2019/9/4
]#
import math # for sqrt, gcd, mod functions
import strutils, typetraits # for number input
import times, os # for timing code execution
import osproc # for getting threads count
import threadpool # for parallel processing
import algorithm # for sort
{.experimental.} # required to use 'parallel' (<= 0.20.x)
# Global array used to count number of primes in each 'seg' byte.
# Each value is number of '0' bits (primes) for values 0..255.
let pbits = [
8,7,7,6,7,6,6,5,7,6,6,5,6,5,5,4,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3
,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2
,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2
,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1
,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2
,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1
,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1
,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0]
proc modinv(a0, b0: int): int =
## Compute modular inverse a^-1 of a to base b, e.g. a*(a^-1) mod b = 1.
var (a, b, x0) = (a0, b0, 0)
result = 1
if b == 1: return
while a > 1:
result -= (a div b) * x0
a = a mod b
swap a, b
swap x0, result
if result < 0: result += b0
proc genPGparameters(prime: int): (int, int, seq[int], seq[int]) =
## Create constant parameters for given PG at compile time.
#echo("generating parameters for P", prime)
let primes = [2, 3, 5, 7, 11, 13, 17, 19, 23]
var (modpg, res_0) = (1, 0) # compute PG's modulus and res_0 value
for prm in primes: (res_0 = prm; if prm > prime: break; modpg *= prm)
var restwins: seq[int] = @[] # save upper twin pair residues here
var inverses = newSeq[int](modpg) # save PG's residues inverses here
var (pc, inc, res) = (5, 2, 0) # use P3's PGS to generate pcs
while pc < modpg div 2: # find a residue, then modular complement
if gcd(modpg, pc) == 1: # if pc a residue
let pc_mc = modpg - pc # create its modular complement
var inv_r = modinv(pc, modpg) # compute residues's inverse
inverses[pc - 2] = inv_r # save its inverse
inverses[inv_r - 2] = pc # save its inverse inverse
inv_r = modinv(pc_mc, modpg) # compute residues's complement inverse
inverses[pc_mc - 2] = inv_r # save its inverse
inverses[inv_r - 2] = pc_mc # save its inverse inverse
if res + 2 == pc: restwins.add(pc); restwins.add(pc_mc + 2) # save hi_tp residues
res = pc
pc += inc; inc = inc xor 0b110
restwins.sort(system.cmp[int]); restwins.add(modpg + 1) # last residue is last hi_tp
inverses[modpg - 1] = 1; inverses[modpg - 3] = modpg - 1 # last 2 resdiues are self inverses
result = (modpg, res_0, restwins, inverses)
# Global parameters
var
KB = 0 # segment size for each seg restrack
start_num: uint # lo number for range
end_num: uint # hi number for range
Kmax: uint # number of resgroups to end_num
Kmin: uint # number of resgroups to start_num
primes: seq[int] # list of primes r1..sqrt(N)
cnts: seq[uint] # hold twin primes counts for seg bytes
lastwins: seq[uint] # holds largest twin prime <= num in each thread
modpg: int # PG's modulus value
res_0: int # PG's first residue value
restwins: seq[int] # PG's list of twinpair residues
resinvrs: seq[int] # PG's list of residues inverses
Bn: int # segment size factor for PG and input number
s: int # 0|3 for 1|8 resgroups/byte for 'small|large' ranges
proc selectPG(start_range, end_range: uint) =
## Select at runtime best PG and segment size factor to use for input value.
## These are good estimates derived from PG data profiling. Can be improved.
let range = end_range - start_range
var pg = 3
if end_range < 49'u:
Bn = 1; pg = 3
elif range < 10_000_000:
Bn = 16; pg = 5
elif range < 1_100_000_000'u:
Bn = 32; pg = 7
elif range < 35_500_000_000'u:
Bn = 64; pg = 11
elif range < 15_000_000_000_000'u:
pg = 13
if range > 7_000_000_000_000'u: Bn = 384
elif range > 2_500_000_000_000'u: Bn = 320
elif range > 250_000_000_000'u: Bn = 196
else: Bn = 128 #96
else:
Bn = 384; pg = 17
# Set value for 'small' or 'large' ranges to opt sieving
s = if range < 100_000_000_000'u: 0 else: 3
(modpg, res_0, restwins, resinvrs) = genPGparameters(pg)
proc sozpg(val: int | int64) =
## Compute the primes r1..sqrt(input_num) and store in global 'primes' array.
## Any algorithm (fast|small) is usable. Here the SoZ for P5 is used.
let md = 30 # P5's modulus value
let rscnt = 8 # P5's residue count
let res =[7,11,13,17,19,23,29,31] # P5's residues list
let posn = [0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,3,0,4,0,0,0,5,0,0,0,0,0,6,0,7]
let kmax = (val - 7) div md + 1 # number of resgroups to input value
var prms = newSeq[uint8](kmax) # byte array of prime candidates init '0'
let sqrtN =int(sqrt float64(val)) # integer sqrt of sqrt(input value)
var (modk, r, k) = (0, -1, 0) # init residue parameters
# mark the multiples of the primes r1..sqrtN in 'prms'
while true:
if (r.inc; r) == rscnt: (r = 0; modk += md; k.inc)
if (prms[k] and uint8(1 shl r)) != 0: continue # skip pc if not prime
let prm_r = res[r] # if prime save its residue value
let prime = modk + prm_r # numerate the prime value
if prime > sqrtN: break # we're finished when it's > sqrtN
for ri in res: # mark prime's multiples in prms
let prod = prm_r * ri - 2 # compute cross-product for prm_r|ri pair
let bit_r = uint8(1 shl posn[prod mod md]) # bit mask for prod's residue
var kpm = k * (prime + ri) + prod div md # 1st resgroup for prime mult
while kpm < kmax: prms[kpm] = prms[kpm] or bit_r; kpm += prime
# prms now contains the nonprime positions for the prime candidates r1..N
# extract primes into global var 'primes'
primes = @[] # create empty dynamic array for primes
for k in 0..kmax - 1: # for each resgroup
for r, res_r in res: # extract the primes in numerical order
if (prms[k] and uint8(1 shl r)) == 0: primes.add(md * k + res_r)
while primes[0] < res_0: primes.delete(0)
while primes[^1] > val: discard primes.pop()
#[
proc printprms(Kn: int, Ki: uint64, indx: int, seg: seq[uint8]) =
## Print twinprimes for given twinpair for given segment slice.
## Primes will not be displayed in sorted order, collect|sort later for that.
var modk = Ki * modpg.uint # base value of 1st resgroup in slice
let r_hi = restwins[indx].uint # for upper twinpair residue value
for k in 0..(Kn - 1) shr 3: # for each byte of resgroups in slice
for r in 0..7: # extract the primes for each resgroup
if (seg[k] and uint8(1 shl r)) == 0 and (modk + r_hi) <= end_num:
echo(modk + r_hi - 1) # print twinprime mid val on a line
modk += modpg.uint # set base value for next resgroup
]#
proc nextp_init(hi_r: int): seq[uint64] =
## Initialize 'nextp' array for given twin pair residues for thread.
## Compute 1st prime multiple resgroups for each prime r1..sqrt(N)
## and store consecutively as lo_tp|hi_tp pairs for their restracks.
var nextp = newSeq[uint64](primes.len * 2) # 1st mults array for twin pair
let (r_hi, r_lo) = (hi_r, hi_r - 2) # upper|lower twin pair residues
var kmin = Kmin - 1 # resgroup index of start_num
if kmin * modpg.uint + r_lo.uint < start_num: kmin.inc # ensure r_lo in range
for j, prime in primes: # for each prime r1..sqrt(N)
let k = (prime - 2) div modpg # find the resgroup it's in
let r = (prime - 2) mod modpg + 2 # and its residue value
let r_inv = resinvrs[r - 2] # and its residue inverse
var ri = (r_lo * r_inv - 2) mod modpg + 2 # compute the ri for r for lo_tp
var ki = uint(k * (prime + ri) + (r * ri - 2) div modpg)
if ki < kmin: # if 1st mult index < start_num's
ki = (kmin - ki) mod prime.uint # how many indices short is it
if ki > 0'u: ki = prime.uint - ki # adjust index value into range
else: ki = ki - kmin # else here, adjust index if it was >
nextp[2 * j] = ki # lo_tp index val >= start of range
ri = (r_hi * r_inv - 2) mod modpg + 2 # compute the ri for r for hi_tp
ki = uint(k * (prime + ri) + (r * ri - 2) div modpg)
if ki < kmin: # if 1st mult index < start_num's
ki = (kmin - ki) mod prime.uint # how many indices short is it
if ki > 0'u: ki = prime.uint - ki # adjust index value into range
else: ki = ki - kmin # else here, adjust index if it was >
nextp[2 * j or 1] = ki # hi_tp index val >= start of range
result = nextp
proc twins_sieve(indx: int, r_hi: uint) {.gcsafe.} =
## Perform in a thread, the ssoz for a given twinpair, for Kmax resgroups.
## First create|init 'nextp' array of 1st prime mults for given twin pair,
## (stored consequtively in 'nextp') and init seg byte array for KB resgroups.
## For sieve, mark resgroup bits to '1' if either twinpair restrack is nonprime,
## for primes mults resgroups, and update 'nextp' restrack slices acccordingly.
## Find last twin prime|sum for range, store in their arrays for this twinpair.
## Can optionally compile to print mid twinprime values generated by twinpair.
## Uses optimum segment sieve structure for 'small' and 'large' range values.
{.gcsafe.}:
var (sum, Ki, Kn) = (0'u, Kmin-1, KB)# init twins cnt|1st resgroup for slice
var (hi_tp, Kmax) = (0'u, Kmax) # max tp|resgroup, Kmax for slice
var seg = newSeq[uint8](((KB-1) shr s) + 1) # seg byte array for Kb resgroups
var nextp = nextp_init(r_hi.int) # 1st prime mults for twin pair restracks
if Ki * modpg.uint + (r_hi - 2) < start_num: Ki.inc # ensure lo tps in range
if (Kmax - 1) * modpg.uint + r_hi > end_num: Kmax.dec # and hi tps in range
while Ki < Kmax: # for Kn resgroup size slices upto Kmax
if Kb > int(Kmax - Ki): Kn = int(Kmax - Ki) # set last slice resgroup size
for b in 0..(Kn - 1) shr s: seg[b] = 0 # set all seg byte bits to prime
for j, prime in primes: # for each prime index r1..sqrt(N)
# for lower twin pair residue track
var k = nextp[2 * j].int # starting from this resgroup in seg
while k < Kn: # mark primenth resgroup bits prime mults
if s > 0: seg[k shr 3] = seg[k shr 3] or (1 shl (k and 7)).uint8
else: seg[k] = seg[k] or 1
k += prime # set next prime multiple resgroup
nextp[2 * j] = uint(k - Kn) # save 1st resgroup in next eligible seg
# for upper twin pair residue track
k = nextp[2 * j or 1].int # starting from this resgroup in seg
while k < Kn: # mark primenth resgroup bits prime mults
if s > 0: seg[k shr 3] = seg[k shr 3] or (1 shl (k and 7)).uint8
else: seg[k] = seg[k] or 1
k += prime # set next prime multiple resgroup
nextp[2 * j or 1] = uint(k - Kn) # save 1st resgroup in next eligible seg
# need to set as nonprime unused bits in last
# byte of last seg; so fast, do for every seg
if s > 0: seg[(Kn-1) shr 3] = seg[(Kn-1) shr 3] or (not ((2 shl ((Kn-1) mod 8)) - 1)).uint8
var cnt = 0 # init seg twin primes count then find seg sum
if s > 0: (for k in 0..(Kn - 1) shr 3: cnt += pbits[seg[k]])
else: (for k in 0..Kn - 1: (if seg[k] == 0: cnt.inc))
if cnt > 0: # if segment has twin primes
sum += cnt.uint # add the segment count to total count
var upk = Kn - 1 # from end of seg, count backwards to largest tp
if s > 0: (while (seg[upk shr 3] and (1 shl (upk and 7)).uint8) != 0: upk.dec)
else: (while seg[upk] != 0: upk.dec)
hi_tp = Ki + upk.uint # numerate its full resgroup value
#printprms(Kn, Ki, indx, seg) # optional: display twin primes in seg
Ki += KB.uint # set 1st resgroup val of next seg slice
# numerate largest twin prime in segs
hi_tp = if r_hi > end_num: 0'u else: hi_tp * modpg.uint + r_hi
lastwins[indx] = if sum == 0: 1'u else: hi_tp # store final seg tp value
cnts[indx] = sum # sum for twin pair
proc twinprimes_ssoz() =
## Main routine to setup, time, and display results for twin primes sieve.
let x = stdin.readline.split
end_num = max(x[0].parseUInt, 3'u)
start_num = if x.len > 1: max(x[1].parseUInt, 3'u) else: 3'u
if start_num > end_num: swap start_num, end_num
echo("threads = ", countProcessors())
let ts = epochTime() # start timing sieve setup execution
start_num = start_num or 1 # if start_num even add 1
end_num = (end_num - 1) or 1 # if end_num even subtract 1
selectPG(start_num, end_num) # select PG and seg factor Bn for input range
let pairscnt = restwins.len # number of twin pairs for selected PG
cnts = newSeq[uint](pairscnt) # array to hold count of tps for each thread
lastwins = newSeq[uint](pairscnt)# array to hold largest tp for each thread
Kmin = (start_num-2) div modpg.uint + 1 # number of resgroups to start_num
Kmax = (end_num-2) div modpg.uint + 1 # number of resgroups to end_num
let range = Kmax - Kmin + 1 # number of range resgroups, at least 1
var n = if range < 37_500_000_000_000'u: 4 elif range < 975_000_000_000_000'u: 6 else: 8
if s == 0: n = 1
let B = Bn * 1024 * n # set seg size to optimize for selected PG
KB = if range.int < B: range.int else: B # segments resgroups size
echo("each thread segment is [", 1, " x ", ((KB-1) shr s) + 1, "] bytes array")
# This is not necessary for running the program but provides information
# to determine the 'efficiency' of the used PG: (num of primes)/(num of pcs)
let maxpairs = range * pairscnt.uint # maximum number of twinprime pcs
echo("twinprime candidates = ", maxpairs, "; resgroups = ", range)
# End of non-essential code.
if end_num < 49'u: primes.add(5) # generate sieving primes
else: sozpg(int(sqrt float64(end_num))) # <= sqrt(end_num)
echo("each ", pairscnt, " threads has nextp[", 2, " x ", primes.len, "] array")
var twinscnt = 0'u64 # number of twin primes in range
let lo_range = uint(restwins[0] - 3) # lo_range = lo_tp - 1
for tp in [3, 5, 11, 17]: # excluded low tp values for PGs used
if end_num == 3'u: break # if 3 end of range, no twin primes
if tp.uint in start_num..lo_range: twinscnt += 1 # cnt small tps if any
let te = epochTime() - ts # sieve setup time
echo("setup time = ", te.formatFloat(ffDecimal, 6), " secs")
echo("perform twinprimes ssoz sieve")
let t1 = epochTime() # start timing ssoz sieve execution
parallel: # perform in parallel
for indx, r_hi in restwins: # for each twin pair row index
spawn twins_sieve(indx, r_hi.uint) # sieve selected twin pair restracks
stdout.write("\r", (indx + 1), " of ", pairscnt, " threads done")
sync() # when all the threads finish
var last_twin = 0'u # find largest twin prime in range
for i in 0..pairscnt - 1: # and determine total twin primes count
twinscnt += cnts[i]
if last_twin < lastwins[i]: last_twin = lastwins[i]
if end_num == 5'u and twinscnt == 1: last_twin = 5'u
var Kn = range.int mod KB # set number of resgroups in last slice
if Kn == 0: Kn = KB # if multiple of seg size set to seg size
let t2 = epochTime() - t1 # sieve execution time
echo("\nsieve time = ", t2.formatFloat(ffDecimal, 3), " secs")
echo("last segment = ", Kn, " resgroups; segment slices = ", (range - 1) div KB.uint + 1)
echo("total twins = ", twinscnt, "; last twin = ", last_twin - 1, "+/-1")
echo("total time = ", (t2 + te).formatFloat(ffDecimal, 3), " secs\n")
twinprimes_ssoz()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.