Skip to content

Instantly share code, notes, and snippets.

@macdice
Created November 21, 2019 02:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save macdice/b7eb7015846b8384972c2c7cfd6d2c22 to your computer and use it in GitHub Desktop.
Save macdice/b7eb7015846b8384972c2c7cfd6d2c22 to your computer and use it in GitHub Desktop.
Experimenting with batch and bucket functions
postgres=# create or replace function hash_combine(seed bigint, hash bigint)
returns bigint as
$$
select (seed # (hash + 2654435769 + (seed << 6) + (seed >> 2)))
& 4294967295;
$$ language sql;
CREATE FUNCTION
postgres=# create or replace function batch_by_rehash(nbatch int, nbucket int, hash bigint)
returns int as
$$
select (hash_combine(0, hash) % nbatch)::int;
$$ language sql;
CREATE FUNCTION
postgres=# create or replace function make_unsigned(i int)
returns bigint as
$$
select case when i < 0 then 4294967296::bigint + i else i end;
$$ language sql;
CREATE FUNCTION
postgres=# select count(*) as non_empty_buckets,
min(collisions) as min_chain,
max(collisions) as max_chain,
avg(collisions) as avg_chain,
stddev(collisions) as stddev_chain
from (
with hashes as (select make_unsigned(hashint8(generate_series(1::bigint, 7000000000::bigint))) hash)
select hash % 1048576,
count(*) as collisions
from hashes
where batch_by_rehash(4096, 1048576, hash) = 0
group by 1
) ss;
non_empty_buckets | min_chain | max_chain | avg_chain | stddev_chain
-------------------+-----------+-----------+-----------------------+------------------
256 | 6212 | 7001 | 6674.5625000000000000 | 110.581298280549
(1 row)
postgres=# create or replace function batch_by_high_bits(hash bigint)
returns int as
$$
select (hash >> 19)::int;
$$ language sql;
CREATE FUNCTION
postgres=# select count(*) as non_empty_buckets,
min(collisions) as min_chain,
max(collisions) as max_chain,
avg(collisions) as avg_chain,
stddev(collisions) as stddev_chain
from (
with hashes as (select make_unsigned(hashint8(generate_series(1::bigint, 7000000000::bigint))) hash)
select hash % 4194304,
count(*) as collisions
from hashes
where batch_by_high_bits(hash) = 0
group by 1
) ss;
non_empty_buckets | min_chain | max_chain | avg_chain | stddev_chain
-------------------+-----------+-----------+--------------------+--------------------
329685 | 1 | 17 | 2.5945645085460364 | 1.4777061278208620
(1 row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment