Skip to content

Instantly share code, notes, and snippets.

@homuler
Last active December 29, 2022 12:57
Show Gist options
  • Save homuler/c1eafcd7cadf254b02feb83b62171747 to your computer and use it in GitHub Desktop.
Save homuler/c1eafcd7cadf254b02feb83b62171747 to your computer and use it in GitHub Desktop.
Sieving Prime Numbers in SQL (PostgreSQL 9.6)
-- Enumerate prime numbers below N using only SQL
-- DDL
create table primes(
value integer primary key,
sieved boolean not null default false
);
create index prime_sieve_idx on
primes(value, sieved);
-- Table for wheel factorization
create table wheel_mods(
r integer primary key -- Integer in the inner-most circle. This and n = \prod_{p \in P} are coprime.
);
-- Table for atkin sieve. This is used to save particular solutions of ax^2 + by^2 = d
--
-- Case 1. 4x^2 + y^2 = d
-- enumerate (x, y) pairs such that 0 <= x <= 15, 0 <= y <= 30, (d, 60) = 1, d ≡ 1 (mod 4)
-- a = 4, b = 1, x_mod = 15, y_mod = 30
--
-- Case 2. 3x^2 + y^2 = d
-- enumerate (x, y) pairs such that 0 <= x <= 10, 0 <= y <= 30, (d, 60) = 1, d ≡ 7 (mod 12)
-- a = 3, b = 1, x_mod = 10, y_mod = 30
--
-- Case 3. 3x^2 - y^2 = d
-- enumerate (x, y) pairs such that 0 <= x <= 10, 0 <= y <= 30, (d, 60) = 1, d ≡ 11 (mod 12)
-- a = 3, b = -1, x_mod = 10, y_mod = 30
create table atkin_ps(
x integer,
y integer,
d integer,
a integer,
b integer,
x_mod integer,
y_mod integer,
primary key(d, x, y)
);
create index atkin_ps_b_idx on
atkin_ps(b);
-- Prime Sieve
--
-- All the queries are tested on Mac Book Pro (2015), 2.2 GHz Intel Core i7, 16 GB 1600 MHz DDR3
-- In each case, the order of operations is written, but they are just for reference and might be inaccurate.
-- A) Insert simply (O(n (\log n)^2)
create or replace function enum_primes(integer) returns int as $$
declare cnt int;
begin
delete from primes;
insert into primes (value)
select X.id from generate_series(2, $1) as X(id)
except(
select (A.id * B.id) as composite
from
generate_series(2, sqrt($1)::integer) as A(id),
generate_series(A.id, $1 / A.id) as B(id));
select count(*) into STRICT cnt from primes;
return cnt;
end;
$$ LANGUAGE plpgsql;
-- select * from enum_primes(1000000);
-- Execution Time:
-- | n | ms |
-- | ---------- | ------ |
-- | 100,000 | 458 |
-- | 1,000,000 | 5,861 |
-- | 10,000,000 | 79,000 |
-- B) Sieve of Eratosthenes (O(n (\log n + \log n \log \log n)))
create or replace function eratosthenes(integer) returns int as $$
declare i int;
declare cnt int;
begin
delete from primes;
insert into primes (value)
select X.id from generate_series(2, $1) as X(id);
i := 2;
LOOP
update primes
set sieved = true
where value in (
select * from generate_series(i * i, $1, i)
);
select value into STRICT i
from primes
where value > i and sieved = false
order by value asc limit 1;
if i > sqrt( $1 ) then EXIT; end if;
end LOOP;
delete from primes where sieved = true;
select count(*) into STRICT cnt from primes;
return cnt;
end;
$$ LANGUAGE plpgsql;
-- select * from eratosthenes(1000000);
-- Execution Time:
-- | n | ms |
-- | ---------- | ------ |
-- | 100,000 | 2,483 |
-- | 1,000,000 | 48,958 |
-- | 10,000,000 | |
-- C) Wheel Factorization (O(48/210 * n (\log n)^2))
create or replace function wheel_factorize(integer) returns int as $$
declare modulo int;
declare cnt int;
begin
modulo := 210;
delete from wheel_mods;
delete from primes;
insert into wheel_mods
select S.id from generate_series(1, modulo) as S(id)
where 0 not in (MOD(S.id, 2), MOD(S.id, 3), MOD(S.id, 5), MOD(S.id, 7));
insert into primes (value)
values (2), (3), (5), (7)
union
select modulo * V.id + wheel_mods.r
from generate_series(0, $1 / modulo) as V(id)
cross join wheel_mods
where modulo * V.id + wheel_mods.r between 2 and $1;
delete from primes
using primes p1, primes p2
where p1.value <= sqrt($1)
and p2.value <= $1 / p1.value
and primes.value = p1.value * p2.value;
select count(*) into STRICT cnt from primes;
return cnt;
end;
$$ LANGUAGE plpgsql;
-- select * from wheel_factorize(1000000);
-- Execution Time:
-- | n | ms |
-- | ---------- | ------ |
-- | 100,000 | 128 |
-- | 1,000,000 | 1,883 |
-- | 10,000,000 | 27,105 |
-- D) Sieve of Atkin (O(n \log n / \log \log n + (n /\log \log n)^3/2))
-- [Prime Sieves using Binary Quadratic Forms](http://www.ams.org/journals/mcom/2004-73-246/S0025-5718-03-01501-1/S0025-5718-03-01501-1.pdf)
create or replace function atkin_sieve(integer) returns int as $$
declare cnt int;
begin
delete from primes;
delete from atkin_ps;
insert into atkin_ps (x, y, d, a, b, x_mod, y_mod)
select X.id, Y.id, MOD(4 * X.id * X.id + Y.id * Y.id, 60), 4, 1, 15, 30
from
generate_series(0, 14) as X(id),
generate_series(0, 29) as Y(id)
where MOD(4 * X.id * X.id + Y.id * Y.id, 60) in (1, 13, 17, 29, 37, 41, 49, 53)
union
select X.id, Y.id, MOD(3 * X.id * X.id + Y.id * Y.id, 60), 3, 1, 10, 30
from
generate_series(0, 9) as X(id),
generate_series(0, 29) as Y(id)
where MOD(3 * X.id * X.id + Y.id * Y.id, 60) in (7, 19, 31, 43)
union
select X.id, Y.id, MOD(MOD(3 * X.id * X.id - Y.id * Y.id, 60) + 60, 60), 3, -1, 10, 30
from
generate_series(0, 9) as X(id),
generate_series(0, 29) as Y(id)
where MOD(MOD(3 * X.id * X.id - Y.id * Y.id, 60) + 60, 60) in (11, 23, 47, 59);
insert into primes(value)
select
a * X.id * X.id + b * Y.id * Y.id
from atkin_ps
cross join
generate_series(x, sqrt($1::float/a)::integer, x_mod) as X(id),
generate_series(y, sqrt(greatest($1 - a*X.id*X.id, 0))::integer, y_mod) as Y(id)
where (a * X.id * X.id + b * Y.id * Y.id) <= $1 and b > 0 and X.id > 0 and Y.id > 0
group by (a * X.id * X.id + b * Y.id * Y.id)
having MOD(count(*), 2) = 1
union
select
a * X.id * X.id + b * Y.id * Y.id
from atkin_ps
cross join
generate_series(x, sqrt($1::float/(a + b))::integer, x_mod) as X(id),
generate_series(y, X.id - 1, y_mod) as Y(id)
where (a * X.id * X.id + b * Y.id * Y.id) <= $1 and b < 0 and X.id > 0
group by (a * X.id * X.id + b * Y.id * Y.id)
having MOD(count(*), 2) = 1;
delete from primes
using primes p1
where p1.value <= sqrt(primes.value)
and p1.value <= sqrt($1)
and MOD(primes.value, p1.value) = 0;
insert into primes (value)
values (2), (3), (5);
select count(*) into STRICT cnt from primes;
return cnt;
end;
$$ LANGUAGE plpgsql;
-- select * from atkin_sieve(1000000);
-- Execution Time:
-- | n | ms |
-- | ---------- | ------ |
-- | 100,000 | 348 |
-- | 1,000,000 | 2,180 |
-- | 10,000,000 | 37,434 |
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment