Skip to content

Instantly share code, notes, and snippets.

@haampie
Created December 18, 2019 17:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save haampie/cbc3427430dca6fe0008f461249dff72 to your computer and use it in GitHub Desktop.
Save haampie/cbc3427430dca6fe0008f461249dff72 to your computer and use it in GitHub Desktop.
prime-monstrosity.jl
module PrimeMonstrosity
const bit_1 = ~(0x01 << 7)
const bit_2 = ~(0x01 << 6)
const bit_3 = ~(0x01 << 5)
const bit_4 = ~(0x01 << 4)
const bit_5 = ~(0x01 << 3)
const bit_6 = ~(0x01 << 2)
const bit_7 = ~(0x01 << 1)
const bit_8 = ~(0x01 << 0)
function pp(p)
dr = [divrem(p * j, 30) for j in (1, 7, 11, 13, 17, 19, 23, 29, 31)]
d = first.(dr)
r = last.(dr)
dd = circshift(d, 1)
d, to_idx.(r), d .- dd
end
function to_idx(x)
x == 1 && return 1
x == 7 && return 2
x == 11 && return 3
x == 13 && return 4
x == 17 && return 5
x == 19 && return 6
x == 23 && return 7
return 8
end
function to_num(byte, bit)
ps = (1, 7, 11, 13, 17, 19, 23, 29)
30 * (byte - 1) + ps[bit]
end
function sieve(n::Int)
n_bytes = ceil(Int, n / 30)
xs = Vector{UInt8}(undef, n_bytes)
fill!(xs, 0xFF)
ps = (1, 7, 11, 13, 17, 19, 23, 29)
@inbounds for i = 1 : length(xs)
x = xs[i]
for k = UInt8(1) : UInt8(8)
# Make sure our current number is a prime
((x >> (0x08 - k)) & 0x01) !== 0x01 && continue
# Compute our current sieving prime
p = ps[k] + (i - 1) * 30
p * p > to_num(n_bytes, 8) && @goto done_sieving
# Don't sieve with 2, 3 and 5
p < 7 && continue
# Start sieving with p.
byte_idx = p * p ÷ 30 + 1
wheel_idx = to_idx(p % 30)
prime_mod = to_idx(p % 30)
increment = p ÷ 30
unrolled_max = n_bytes - increment * 28 - 28
if prime_mod === 1
if wheel_idx == 1
@goto x1
elseif wheel_idx == 2
@goto x2
elseif wheel_idx == 3
@goto x3
elseif wheel_idx == 4
@goto x4
elseif wheel_idx == 5
@goto x5
elseif wheel_idx == 6
@goto x6
elseif wheel_idx == 7
@goto x7
elseif wheel_idx == 8
@goto x8
end
while true
@label x1
while true
byte_idx > unrolled_max && break
xs[byte_idx + increment * 0 + 0] &= bit_1
xs[byte_idx + increment * 6 + 0] &= bit_2
xs[byte_idx + increment * 10 + 0] &= bit_3
xs[byte_idx + increment * 12 + 0] &= bit_4
xs[byte_idx + increment * 16 + 0] &= bit_5
xs[byte_idx + increment * 18 + 0] &= bit_6
xs[byte_idx + increment * 22 + 0] &= bit_7
xs[byte_idx + increment * 28 + 0] &= bit_8
byte_idx += increment * 30 + 1
end
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_1
byte_idx += increment * 6 + 0
@label x2
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_2
byte_idx += increment * 4 + 0
@label x3
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_3
byte_idx += increment * 2 + 0
@label x4
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_4
byte_idx += increment * 4 + 0
@label x5
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_5
byte_idx += increment * 2 + 0
@label x6
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_6
byte_idx += increment * 4 + 0
@label x7
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_7
byte_idx += increment * 6 + 0
@label x8
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_8
byte_idx += increment * 2 + 1
end
elseif prime_mod == 2
if wheel_idx == 1
@goto x9
elseif wheel_idx == 2
@goto x10
elseif wheel_idx == 3
@goto x11
elseif wheel_idx == 4
@goto x12
elseif wheel_idx == 5
@goto x13
elseif wheel_idx == 6
@goto x14
elseif wheel_idx == 7
@goto x15
elseif wheel_idx == 8
@goto x16
end
# 7
while true
@label x9
while true
byte_idx > unrolled_max && break
xs[byte_idx + increment * 0 + 0] &= bit_2
xs[byte_idx + increment * 6 + 1] &= bit_6
xs[byte_idx + increment * 10 + 2] &= bit_5
xs[byte_idx + increment * 12 + 3] &= bit_1
xs[byte_idx + increment * 16 + 3] &= bit_8
xs[byte_idx + increment * 18 + 4] &= bit_4
xs[byte_idx + increment * 22 + 5] &= bit_3
xs[byte_idx + increment * 28 + 6] &= bit_7
byte_idx += increment * 30 + 7
end
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_2
byte_idx += increment * 6 + 1
@label x10
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_6
byte_idx += increment * 4 + 1
@label x11
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_5
byte_idx += increment * 2 + 1
@label x12
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_1
byte_idx += increment * 4 + 0
@label x13
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_8
byte_idx += increment * 2 + 1
@label x14
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_4
byte_idx += increment * 4 + 1
@label x15
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_3
byte_idx += increment * 6 + 1
@label x16
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_7
byte_idx += increment * 2 + 1
end
elseif prime_mod === 3
if wheel_idx == 1
@goto x17
elseif wheel_idx == 2
@goto x18
elseif wheel_idx == 3
@goto x19
elseif wheel_idx == 4
@goto x20
elseif wheel_idx == 5
@goto x21
elseif wheel_idx == 6
@goto x22
elseif wheel_idx == 7
@goto x23
elseif wheel_idx == 8
@goto x24
end
# 11
while true
@label x17
while true
byte_idx > unrolled_max && break
xs[byte_idx + increment * 0 + 0] &= bit_3
xs[byte_idx + increment * 6 + 2] &= bit_5
xs[byte_idx + increment * 10 + 4] &= bit_1
xs[byte_idx + increment * 12 + 4] &= bit_7
xs[byte_idx + increment * 16 + 6] &= bit_2
xs[byte_idx + increment * 18 + 6] &= bit_8
xs[byte_idx + increment * 22 + 8] &= bit_4
xs[byte_idx + increment * 28 + 10] &= bit_6
byte_idx += increment * 30 + 11
end
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_3
byte_idx += increment * 6 + 2
@label x18
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_5
byte_idx += increment * 4 + 2
@label x19
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_1
byte_idx += increment * 2 + 0
@label x20
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_7
byte_idx += increment * 4 + 2
@label x21
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_2
byte_idx += increment * 2 + 0
@label x22
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_8
byte_idx += increment * 4 + 2
@label x23
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_4
byte_idx += increment * 6 + 2
@label x24
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_6
byte_idx += increment * 2 + 1
end
elseif prime_mod === 4
if wheel_idx == 1
@goto x25
elseif wheel_idx == 2
@goto x26
elseif wheel_idx == 3
@goto x27
elseif wheel_idx == 4
@goto x28
elseif wheel_idx == 5
@goto x29
elseif wheel_idx == 6
@goto x30
elseif wheel_idx == 7
@goto x31
elseif wheel_idx == 8
@goto x32
end
# 13
while true
@label x25
while true
byte_idx > unrolled_max && break
xs[byte_idx + increment * 0 + 0] &= bit_4
xs[byte_idx + increment * 6 + 3] &= bit_1
xs[byte_idx + increment * 10 + 4] &= bit_7
xs[byte_idx + increment * 12 + 5] &= bit_6
xs[byte_idx + increment * 16 + 7] &= bit_3
xs[byte_idx + increment * 18 + 8] &= bit_2
xs[byte_idx + increment * 22 + 9] &= bit_8
xs[byte_idx + increment * 28 + 12] &= bit_5
byte_idx += increment * 30 + 13
end
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_4
byte_idx += increment * 6 + 3
@label x26
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_1
byte_idx += increment * 4 + 1
@label x27
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_7
byte_idx += increment * 2 + 1
@label x28
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_6
byte_idx += increment * 4 + 2
@label x29
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_3
byte_idx += increment * 2 + 1
@label x30
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_2
byte_idx += increment * 4 + 1
@label x31
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_8
byte_idx += increment * 6 + 3
@label x32
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_5
byte_idx += increment * 2 + 1
end
elseif prime_mod === 5
if wheel_idx == 1
@goto x33
elseif wheel_idx == 2
@goto x34
elseif wheel_idx == 3
@goto x35
elseif wheel_idx == 4
@goto x36
elseif wheel_idx == 5
@goto x37
elseif wheel_idx == 6
@goto x38
elseif wheel_idx == 7
@goto x39
elseif wheel_idx == 8
@goto x40
end
# 17
while true
@label x33
while true
byte_idx > unrolled_max && break
xs[byte_idx + increment * 0 + 0] &= bit_5
xs[byte_idx + increment * 6 + 3] &= bit_8
xs[byte_idx + increment * 10 + 6] &= bit_2
xs[byte_idx + increment * 12 + 7] &= bit_3
xs[byte_idx + increment * 16 + 9] &= bit_6
xs[byte_idx + increment * 18 + 10] &= bit_7
xs[byte_idx + increment * 22 + 13] &= bit_1
xs[byte_idx + increment * 28 + 16] &= bit_4
byte_idx += increment * 30 + 17
end
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_5
byte_idx += increment * 6 + 3
@label x34
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_8
byte_idx += increment * 4 + 3
@label x35
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_2
byte_idx += increment * 2 + 1
@label x36
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_3
byte_idx += increment * 4 + 2
@label x37
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_6
byte_idx += increment * 2 + 1
@label x38
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_7
byte_idx += increment * 4 + 3
@label x39
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_1
byte_idx += increment * 6 + 3
@label x40
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_4
byte_idx += increment * 2 + 1
end
elseif prime_mod === 6
if wheel_idx == 1
@goto x41
elseif wheel_idx == 2
@goto x42
elseif wheel_idx == 3
@goto x43
elseif wheel_idx == 4
@goto x44
elseif wheel_idx == 5
@goto x45
elseif wheel_idx == 6
@goto x46
elseif wheel_idx == 7
@goto x47
elseif wheel_idx == 8
@goto x48
end
# 19
while true
@label x41
while true
byte_idx > unrolled_max && break
xs[byte_idx + increment * 0 + 0] &= bit_6
xs[byte_idx + increment * 6 + 4] &= bit_4
xs[byte_idx + increment * 10 + 6] &= bit_8
xs[byte_idx + increment * 12 + 8] &= bit_2
xs[byte_idx + increment * 16 + 10] &= bit_7
xs[byte_idx + increment * 18 + 12] &= bit_1
xs[byte_idx + increment * 22 + 14] &= bit_5
xs[byte_idx + increment * 28 + 18] &= bit_3
byte_idx += increment * 30 + 19
end
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_6
byte_idx += increment * 6 + 4
@label x42
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_4
byte_idx += increment * 4 + 2
@label x43
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_8
byte_idx += increment * 2 + 2
@label x44
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_2
byte_idx += increment * 4 + 2
@label x45
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_7
byte_idx += increment * 2 + 2
@label x46
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_1
byte_idx += increment * 4 + 2
@label x47
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_5
byte_idx += increment * 6 + 4
@label x48
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_3
byte_idx += increment * 2 + 1
end
elseif prime_mod === 7
if wheel_idx == 1
@goto x49
elseif wheel_idx == 2
@goto x50
elseif wheel_idx == 3
@goto x51
elseif wheel_idx == 4
@goto x52
elseif wheel_idx == 5
@goto x53
elseif wheel_idx == 6
@goto x54
elseif wheel_idx == 7
@goto x55
elseif wheel_idx == 8
@goto x56
end
# 23
while true
@label x49
while true
byte_idx > unrolled_max && break
xs[byte_idx + increment * 0 + 0] &= bit_7
xs[byte_idx + increment * 6 + 5] &= bit_3
xs[byte_idx + increment * 10 + 8] &= bit_4
xs[byte_idx + increment * 12 + 9] &= bit_8
xs[byte_idx + increment * 16 + 13] &= bit_1
xs[byte_idx + increment * 18 + 14] &= bit_5
xs[byte_idx + increment * 22 + 17] &= bit_6
xs[byte_idx + increment * 28 + 22] &= bit_2
byte_idx += increment * 30 + 23
end
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_7
byte_idx += increment * 6 + 5
@label x50
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_3
byte_idx += increment * 4 + 3
@label x51
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_4
byte_idx += increment * 2 + 1
@label x52
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_8
byte_idx += increment * 4 + 4
@label x53
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_1
byte_idx += increment * 2 + 1
@label x54
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_5
byte_idx += increment * 4 + 3
@label x55
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_6
byte_idx += increment * 6 + 5
@label x56
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_2
byte_idx += increment * 2 + 1
end
elseif prime_mod === 8
if wheel_idx == 1
@goto x57
elseif wheel_idx == 2
@goto x58
elseif wheel_idx == 3
@goto x59
elseif wheel_idx == 4
@goto x60
elseif wheel_idx == 5
@goto x61
elseif wheel_idx == 6
@goto x62
elseif wheel_idx == 7
@goto x63
elseif wheel_idx == 8
@goto x64
end
# 29
while true
@label x57
while true
byte_idx > unrolled_max && break
xs[byte_idx + increment * 0 + 0] &= bit_8
xs[byte_idx + increment * 6 + 6] &= bit_7
xs[byte_idx + increment * 10 + 10] &= bit_6
xs[byte_idx + increment * 12 + 12] &= bit_5
xs[byte_idx + increment * 16 + 16] &= bit_4
xs[byte_idx + increment * 18 + 18] &= bit_3
xs[byte_idx + increment * 22 + 22] &= bit_2
xs[byte_idx + increment * 28 + 28] &= bit_1
byte_idx += increment * 30 + 29
end
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_8
byte_idx += increment * 6 + 6
@label x58
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_7
byte_idx += increment * 4 + 4
@label x59
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_6
byte_idx += increment * 2 + 2
@label x60
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_5
byte_idx += increment * 4 + 4
@label x61
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_4
byte_idx += increment * 2 + 2
@label x62
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_3
byte_idx += increment * 4 + 4
@label x63
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_2
byte_idx += increment * 6 + 6
@label x64
byte_idx > n_bytes && @goto out
xs[byte_idx] &= bit_1
byte_idx += increment * 2 + 1
end
end
@label out
end
end
@label done_sieving
return xs
# result = Int[]
# sizehint!(result, 5 + floor(Int, n / (log(n) - 1.12)))
# @inbounds for i = 1 : length(xs)
# x = xs[i]
# for k = UInt8(1) : UInt8(8)
# # Make sure our current number is a prime
# ((x >> (0x08 - k)) & 0x01) !== 0x01 && continue
# # Compute our current sieving prime
# push!(result, ps[k] + (i - 1) * 30)
# end
# end
# return result
end
end # module
julia> @benchmark PrimeMonstrosity.sieve(500_000)
BenchmarkTools.Trial:
memory estimate: 16.39 KiB
allocs estimate: 2
--------------
minimum time: 69.349 μs (0.00% GC)
median time: 70.561 μs (0.00% GC)
mean time: 71.786 μs (1.10% GC)
maximum time: 2.793 ms (97.30% GC)
--------------
samples: 10000
evals/sample: 1
julia> @benchmark Primes._primesmask(7, 500_000)
BenchmarkTools.Trial:
memory estimate: 130.33 KiB
allocs estimate: 2
--------------
minimum time: 630.627 μs (0.00% GC)
median time: 698.513 μs (0.00% GC)
mean time: 714.792 μs (1.71% GC)
maximum time: 5.569 ms (86.19% GC)
--------------
samples: 6974
evals/sample: 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment