Created
December 18, 2019 17:32
-
-
Save haampie/cbc3427430dca6fe0008f461249dff72 to your computer and use it in GitHub Desktop.
prime-monstrosity.jl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module PrimeMonstrosity | |
const bit_1 = ~(0x01 << 7) | |
const bit_2 = ~(0x01 << 6) | |
const bit_3 = ~(0x01 << 5) | |
const bit_4 = ~(0x01 << 4) | |
const bit_5 = ~(0x01 << 3) | |
const bit_6 = ~(0x01 << 2) | |
const bit_7 = ~(0x01 << 1) | |
const bit_8 = ~(0x01 << 0) | |
function pp(p) | |
dr = [divrem(p * j, 30) for j in (1, 7, 11, 13, 17, 19, 23, 29, 31)] | |
d = first.(dr) | |
r = last.(dr) | |
dd = circshift(d, 1) | |
d, to_idx.(r), d .- dd | |
end | |
function to_idx(x) | |
x == 1 && return 1 | |
x == 7 && return 2 | |
x == 11 && return 3 | |
x == 13 && return 4 | |
x == 17 && return 5 | |
x == 19 && return 6 | |
x == 23 && return 7 | |
return 8 | |
end | |
function to_num(byte, bit) | |
ps = (1, 7, 11, 13, 17, 19, 23, 29) | |
30 * (byte - 1) + ps[bit] | |
end | |
function sieve(n::Int) | |
n_bytes = ceil(Int, n / 30) | |
xs = Vector{UInt8}(undef, n_bytes) | |
fill!(xs, 0xFF) | |
ps = (1, 7, 11, 13, 17, 19, 23, 29) | |
@inbounds for i = 1 : length(xs) | |
x = xs[i] | |
for k = UInt8(1) : UInt8(8) | |
# Make sure our current number is a prime | |
((x >> (0x08 - k)) & 0x01) !== 0x01 && continue | |
# Compute our current sieving prime | |
p = ps[k] + (i - 1) * 30 | |
p * p > to_num(n_bytes, 8) && @goto done_sieving | |
# Don't sieve with 2, 3 and 5 | |
p < 7 && continue | |
# Start sieving with p. | |
byte_idx = p * p ÷ 30 + 1 | |
wheel_idx = to_idx(p % 30) | |
prime_mod = to_idx(p % 30) | |
increment = p ÷ 30 | |
unrolled_max = n_bytes - increment * 28 - 28 | |
if prime_mod === 1 | |
if wheel_idx == 1 | |
@goto x1 | |
elseif wheel_idx == 2 | |
@goto x2 | |
elseif wheel_idx == 3 | |
@goto x3 | |
elseif wheel_idx == 4 | |
@goto x4 | |
elseif wheel_idx == 5 | |
@goto x5 | |
elseif wheel_idx == 6 | |
@goto x6 | |
elseif wheel_idx == 7 | |
@goto x7 | |
elseif wheel_idx == 8 | |
@goto x8 | |
end | |
while true | |
@label x1 | |
while true | |
byte_idx > unrolled_max && break | |
xs[byte_idx + increment * 0 + 0] &= bit_1 | |
xs[byte_idx + increment * 6 + 0] &= bit_2 | |
xs[byte_idx + increment * 10 + 0] &= bit_3 | |
xs[byte_idx + increment * 12 + 0] &= bit_4 | |
xs[byte_idx + increment * 16 + 0] &= bit_5 | |
xs[byte_idx + increment * 18 + 0] &= bit_6 | |
xs[byte_idx + increment * 22 + 0] &= bit_7 | |
xs[byte_idx + increment * 28 + 0] &= bit_8 | |
byte_idx += increment * 30 + 1 | |
end | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_1 | |
byte_idx += increment * 6 + 0 | |
@label x2 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_2 | |
byte_idx += increment * 4 + 0 | |
@label x3 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_3 | |
byte_idx += increment * 2 + 0 | |
@label x4 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_4 | |
byte_idx += increment * 4 + 0 | |
@label x5 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_5 | |
byte_idx += increment * 2 + 0 | |
@label x6 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_6 | |
byte_idx += increment * 4 + 0 | |
@label x7 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_7 | |
byte_idx += increment * 6 + 0 | |
@label x8 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_8 | |
byte_idx += increment * 2 + 1 | |
end | |
elseif prime_mod == 2 | |
if wheel_idx == 1 | |
@goto x9 | |
elseif wheel_idx == 2 | |
@goto x10 | |
elseif wheel_idx == 3 | |
@goto x11 | |
elseif wheel_idx == 4 | |
@goto x12 | |
elseif wheel_idx == 5 | |
@goto x13 | |
elseif wheel_idx == 6 | |
@goto x14 | |
elseif wheel_idx == 7 | |
@goto x15 | |
elseif wheel_idx == 8 | |
@goto x16 | |
end | |
# 7 | |
while true | |
@label x9 | |
while true | |
byte_idx > unrolled_max && break | |
xs[byte_idx + increment * 0 + 0] &= bit_2 | |
xs[byte_idx + increment * 6 + 1] &= bit_6 | |
xs[byte_idx + increment * 10 + 2] &= bit_5 | |
xs[byte_idx + increment * 12 + 3] &= bit_1 | |
xs[byte_idx + increment * 16 + 3] &= bit_8 | |
xs[byte_idx + increment * 18 + 4] &= bit_4 | |
xs[byte_idx + increment * 22 + 5] &= bit_3 | |
xs[byte_idx + increment * 28 + 6] &= bit_7 | |
byte_idx += increment * 30 + 7 | |
end | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_2 | |
byte_idx += increment * 6 + 1 | |
@label x10 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_6 | |
byte_idx += increment * 4 + 1 | |
@label x11 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_5 | |
byte_idx += increment * 2 + 1 | |
@label x12 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_1 | |
byte_idx += increment * 4 + 0 | |
@label x13 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_8 | |
byte_idx += increment * 2 + 1 | |
@label x14 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_4 | |
byte_idx += increment * 4 + 1 | |
@label x15 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_3 | |
byte_idx += increment * 6 + 1 | |
@label x16 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_7 | |
byte_idx += increment * 2 + 1 | |
end | |
elseif prime_mod === 3 | |
if wheel_idx == 1 | |
@goto x17 | |
elseif wheel_idx == 2 | |
@goto x18 | |
elseif wheel_idx == 3 | |
@goto x19 | |
elseif wheel_idx == 4 | |
@goto x20 | |
elseif wheel_idx == 5 | |
@goto x21 | |
elseif wheel_idx == 6 | |
@goto x22 | |
elseif wheel_idx == 7 | |
@goto x23 | |
elseif wheel_idx == 8 | |
@goto x24 | |
end | |
# 11 | |
while true | |
@label x17 | |
while true | |
byte_idx > unrolled_max && break | |
xs[byte_idx + increment * 0 + 0] &= bit_3 | |
xs[byte_idx + increment * 6 + 2] &= bit_5 | |
xs[byte_idx + increment * 10 + 4] &= bit_1 | |
xs[byte_idx + increment * 12 + 4] &= bit_7 | |
xs[byte_idx + increment * 16 + 6] &= bit_2 | |
xs[byte_idx + increment * 18 + 6] &= bit_8 | |
xs[byte_idx + increment * 22 + 8] &= bit_4 | |
xs[byte_idx + increment * 28 + 10] &= bit_6 | |
byte_idx += increment * 30 + 11 | |
end | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_3 | |
byte_idx += increment * 6 + 2 | |
@label x18 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_5 | |
byte_idx += increment * 4 + 2 | |
@label x19 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_1 | |
byte_idx += increment * 2 + 0 | |
@label x20 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_7 | |
byte_idx += increment * 4 + 2 | |
@label x21 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_2 | |
byte_idx += increment * 2 + 0 | |
@label x22 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_8 | |
byte_idx += increment * 4 + 2 | |
@label x23 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_4 | |
byte_idx += increment * 6 + 2 | |
@label x24 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_6 | |
byte_idx += increment * 2 + 1 | |
end | |
elseif prime_mod === 4 | |
if wheel_idx == 1 | |
@goto x25 | |
elseif wheel_idx == 2 | |
@goto x26 | |
elseif wheel_idx == 3 | |
@goto x27 | |
elseif wheel_idx == 4 | |
@goto x28 | |
elseif wheel_idx == 5 | |
@goto x29 | |
elseif wheel_idx == 6 | |
@goto x30 | |
elseif wheel_idx == 7 | |
@goto x31 | |
elseif wheel_idx == 8 | |
@goto x32 | |
end | |
# 13 | |
while true | |
@label x25 | |
while true | |
byte_idx > unrolled_max && break | |
xs[byte_idx + increment * 0 + 0] &= bit_4 | |
xs[byte_idx + increment * 6 + 3] &= bit_1 | |
xs[byte_idx + increment * 10 + 4] &= bit_7 | |
xs[byte_idx + increment * 12 + 5] &= bit_6 | |
xs[byte_idx + increment * 16 + 7] &= bit_3 | |
xs[byte_idx + increment * 18 + 8] &= bit_2 | |
xs[byte_idx + increment * 22 + 9] &= bit_8 | |
xs[byte_idx + increment * 28 + 12] &= bit_5 | |
byte_idx += increment * 30 + 13 | |
end | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_4 | |
byte_idx += increment * 6 + 3 | |
@label x26 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_1 | |
byte_idx += increment * 4 + 1 | |
@label x27 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_7 | |
byte_idx += increment * 2 + 1 | |
@label x28 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_6 | |
byte_idx += increment * 4 + 2 | |
@label x29 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_3 | |
byte_idx += increment * 2 + 1 | |
@label x30 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_2 | |
byte_idx += increment * 4 + 1 | |
@label x31 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_8 | |
byte_idx += increment * 6 + 3 | |
@label x32 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_5 | |
byte_idx += increment * 2 + 1 | |
end | |
elseif prime_mod === 5 | |
if wheel_idx == 1 | |
@goto x33 | |
elseif wheel_idx == 2 | |
@goto x34 | |
elseif wheel_idx == 3 | |
@goto x35 | |
elseif wheel_idx == 4 | |
@goto x36 | |
elseif wheel_idx == 5 | |
@goto x37 | |
elseif wheel_idx == 6 | |
@goto x38 | |
elseif wheel_idx == 7 | |
@goto x39 | |
elseif wheel_idx == 8 | |
@goto x40 | |
end | |
# 17 | |
while true | |
@label x33 | |
while true | |
byte_idx > unrolled_max && break | |
xs[byte_idx + increment * 0 + 0] &= bit_5 | |
xs[byte_idx + increment * 6 + 3] &= bit_8 | |
xs[byte_idx + increment * 10 + 6] &= bit_2 | |
xs[byte_idx + increment * 12 + 7] &= bit_3 | |
xs[byte_idx + increment * 16 + 9] &= bit_6 | |
xs[byte_idx + increment * 18 + 10] &= bit_7 | |
xs[byte_idx + increment * 22 + 13] &= bit_1 | |
xs[byte_idx + increment * 28 + 16] &= bit_4 | |
byte_idx += increment * 30 + 17 | |
end | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_5 | |
byte_idx += increment * 6 + 3 | |
@label x34 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_8 | |
byte_idx += increment * 4 + 3 | |
@label x35 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_2 | |
byte_idx += increment * 2 + 1 | |
@label x36 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_3 | |
byte_idx += increment * 4 + 2 | |
@label x37 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_6 | |
byte_idx += increment * 2 + 1 | |
@label x38 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_7 | |
byte_idx += increment * 4 + 3 | |
@label x39 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_1 | |
byte_idx += increment * 6 + 3 | |
@label x40 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_4 | |
byte_idx += increment * 2 + 1 | |
end | |
elseif prime_mod === 6 | |
if wheel_idx == 1 | |
@goto x41 | |
elseif wheel_idx == 2 | |
@goto x42 | |
elseif wheel_idx == 3 | |
@goto x43 | |
elseif wheel_idx == 4 | |
@goto x44 | |
elseif wheel_idx == 5 | |
@goto x45 | |
elseif wheel_idx == 6 | |
@goto x46 | |
elseif wheel_idx == 7 | |
@goto x47 | |
elseif wheel_idx == 8 | |
@goto x48 | |
end | |
# 19 | |
while true | |
@label x41 | |
while true | |
byte_idx > unrolled_max && break | |
xs[byte_idx + increment * 0 + 0] &= bit_6 | |
xs[byte_idx + increment * 6 + 4] &= bit_4 | |
xs[byte_idx + increment * 10 + 6] &= bit_8 | |
xs[byte_idx + increment * 12 + 8] &= bit_2 | |
xs[byte_idx + increment * 16 + 10] &= bit_7 | |
xs[byte_idx + increment * 18 + 12] &= bit_1 | |
xs[byte_idx + increment * 22 + 14] &= bit_5 | |
xs[byte_idx + increment * 28 + 18] &= bit_3 | |
byte_idx += increment * 30 + 19 | |
end | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_6 | |
byte_idx += increment * 6 + 4 | |
@label x42 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_4 | |
byte_idx += increment * 4 + 2 | |
@label x43 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_8 | |
byte_idx += increment * 2 + 2 | |
@label x44 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_2 | |
byte_idx += increment * 4 + 2 | |
@label x45 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_7 | |
byte_idx += increment * 2 + 2 | |
@label x46 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_1 | |
byte_idx += increment * 4 + 2 | |
@label x47 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_5 | |
byte_idx += increment * 6 + 4 | |
@label x48 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_3 | |
byte_idx += increment * 2 + 1 | |
end | |
elseif prime_mod === 7 | |
if wheel_idx == 1 | |
@goto x49 | |
elseif wheel_idx == 2 | |
@goto x50 | |
elseif wheel_idx == 3 | |
@goto x51 | |
elseif wheel_idx == 4 | |
@goto x52 | |
elseif wheel_idx == 5 | |
@goto x53 | |
elseif wheel_idx == 6 | |
@goto x54 | |
elseif wheel_idx == 7 | |
@goto x55 | |
elseif wheel_idx == 8 | |
@goto x56 | |
end | |
# 23 | |
while true | |
@label x49 | |
while true | |
byte_idx > unrolled_max && break | |
xs[byte_idx + increment * 0 + 0] &= bit_7 | |
xs[byte_idx + increment * 6 + 5] &= bit_3 | |
xs[byte_idx + increment * 10 + 8] &= bit_4 | |
xs[byte_idx + increment * 12 + 9] &= bit_8 | |
xs[byte_idx + increment * 16 + 13] &= bit_1 | |
xs[byte_idx + increment * 18 + 14] &= bit_5 | |
xs[byte_idx + increment * 22 + 17] &= bit_6 | |
xs[byte_idx + increment * 28 + 22] &= bit_2 | |
byte_idx += increment * 30 + 23 | |
end | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_7 | |
byte_idx += increment * 6 + 5 | |
@label x50 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_3 | |
byte_idx += increment * 4 + 3 | |
@label x51 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_4 | |
byte_idx += increment * 2 + 1 | |
@label x52 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_8 | |
byte_idx += increment * 4 + 4 | |
@label x53 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_1 | |
byte_idx += increment * 2 + 1 | |
@label x54 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_5 | |
byte_idx += increment * 4 + 3 | |
@label x55 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_6 | |
byte_idx += increment * 6 + 5 | |
@label x56 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_2 | |
byte_idx += increment * 2 + 1 | |
end | |
elseif prime_mod === 8 | |
if wheel_idx == 1 | |
@goto x57 | |
elseif wheel_idx == 2 | |
@goto x58 | |
elseif wheel_idx == 3 | |
@goto x59 | |
elseif wheel_idx == 4 | |
@goto x60 | |
elseif wheel_idx == 5 | |
@goto x61 | |
elseif wheel_idx == 6 | |
@goto x62 | |
elseif wheel_idx == 7 | |
@goto x63 | |
elseif wheel_idx == 8 | |
@goto x64 | |
end | |
# 29 | |
while true | |
@label x57 | |
while true | |
byte_idx > unrolled_max && break | |
xs[byte_idx + increment * 0 + 0] &= bit_8 | |
xs[byte_idx + increment * 6 + 6] &= bit_7 | |
xs[byte_idx + increment * 10 + 10] &= bit_6 | |
xs[byte_idx + increment * 12 + 12] &= bit_5 | |
xs[byte_idx + increment * 16 + 16] &= bit_4 | |
xs[byte_idx + increment * 18 + 18] &= bit_3 | |
xs[byte_idx + increment * 22 + 22] &= bit_2 | |
xs[byte_idx + increment * 28 + 28] &= bit_1 | |
byte_idx += increment * 30 + 29 | |
end | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_8 | |
byte_idx += increment * 6 + 6 | |
@label x58 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_7 | |
byte_idx += increment * 4 + 4 | |
@label x59 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_6 | |
byte_idx += increment * 2 + 2 | |
@label x60 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_5 | |
byte_idx += increment * 4 + 4 | |
@label x61 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_4 | |
byte_idx += increment * 2 + 2 | |
@label x62 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_3 | |
byte_idx += increment * 4 + 4 | |
@label x63 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_2 | |
byte_idx += increment * 6 + 6 | |
@label x64 | |
byte_idx > n_bytes && @goto out | |
xs[byte_idx] &= bit_1 | |
byte_idx += increment * 2 + 1 | |
end | |
end | |
@label out | |
end | |
end | |
@label done_sieving | |
return xs | |
# result = Int[] | |
# sizehint!(result, 5 + floor(Int, n / (log(n) - 1.12))) | |
# @inbounds for i = 1 : length(xs) | |
# x = xs[i] | |
# for k = UInt8(1) : UInt8(8) | |
# # Make sure our current number is a prime | |
# ((x >> (0x08 - k)) & 0x01) !== 0x01 && continue | |
# # Compute our current sieving prime | |
# push!(result, ps[k] + (i - 1) * 30) | |
# end | |
# end | |
# return result | |
end | |
end # module |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
julia> @benchmark PrimeMonstrosity.sieve(500_000) | |
BenchmarkTools.Trial: | |
memory estimate: 16.39 KiB | |
allocs estimate: 2 | |
-------------- | |
minimum time: 69.349 μs (0.00% GC) | |
median time: 70.561 μs (0.00% GC) | |
mean time: 71.786 μs (1.10% GC) | |
maximum time: 2.793 ms (97.30% GC) | |
-------------- | |
samples: 10000 | |
evals/sample: 1 | |
julia> @benchmark Primes._primesmask(7, 500_000) | |
BenchmarkTools.Trial: | |
memory estimate: 130.33 KiB | |
allocs estimate: 2 | |
-------------- | |
minimum time: 630.627 μs (0.00% GC) | |
median time: 698.513 μs (0.00% GC) | |
mean time: 714.792 μs (1.71% GC) | |
maximum time: 5.569 ms (86.19% GC) | |
-------------- | |
samples: 6974 | |
evals/sample: 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment