Last active
August 1, 2017 19:19
-
-
Save mble/ed1d8c20865b880f4ca4735d52947eb7 to your computer and use it in GitHub Desktop.
Some join benchmarking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- create some data | |
create table users ( | |
id serial primary key, | |
shadowbanned boolean default false | |
); | |
create table posts ( | |
id serial primary key, | |
user_id integer references users(id) | |
); | |
insert into users (shadowbanned) select (random() > 0.8) from generate_series(1, 1000000); | |
insert into posts (user_id) select (id) from users; | |
-- create some useful indicies | |
create index posts_user_id_idx on posts(user_id); | |
create index users_shadowbanned_idx on users(shadowbanned); | |
-- freshen up | |
vacuum analyze; | |
-- values join | |
explain analyze | |
select posts.* | |
from (select unnest(array_agg(users.id)) as user_id from users where users.shadowbanned = false or users.id = 67) as user_id | |
join posts using(user_id); | |
-- QUERY PLAN | |
-- ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── | |
-- Nested Loop (cost=18925.59..19764.18 rows=100 width=8) (actual time=185.363..1547.569 rows=800348 loops=1) | |
-- -> Aggregate (cost=18925.17..18925.68 rows=100 width=4) (actual time=185.348..258.586 rows=800348 loops=1) | |
-- -> Seq Scan on users (cost=0.00..16925.00 rows=800067 width=4) (actual time=0.010..106.919 rows=800348 loops=1) | |
-- Filter: ((NOT shadowbanned) OR (id = 67)) | |
-- Rows Removed by Filter: 199652 | |
-- -> Index Scan using posts_user_id_idx on posts (cost=0.42..8.37 rows=1 width=8) (actual time=0.001..0.001 rows=1 loops=800348) | |
-- Index Cond: (user_id = (unnest(array_agg(users.id)))) | |
-- Planning time: 0.185 ms | |
-- Execution time: 1590.360 ms | |
-- values join 2 | |
explain analyze | |
select posts.* | |
from posts | |
inner join ( | |
select id from users where users.shadowbanned = false or users.id = 67 | |
) ex(user_id) on (posts.user_id = ex.user_id); | |
-- QUERY PLAN | |
-- ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── | |
-- Hash Join (cost=30051.84..73417.51 rows=800067 width=8) (actual time=257.991..1347.789 rows=800348 loops=1) | |
-- Hash Cond: (posts.user_id = users.id) | |
-- -> Seq Scan on posts (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.005..77.740 rows=1000000 loops=1) | |
-- -> Hash (cost=16925.00..16925.00 rows=800067 width=4) (actual time=257.374..257.374 rows=800348 loops=1) | |
-- Buckets: 65536 Batches: 2 Memory Usage: 14072kB | |
-- -> Seq Scan on users (cost=0.00..16925.00 rows=800067 width=4) (actual time=0.006..132.818 rows=800348 loops=1) | |
-- Filter: ((NOT shadowbanned) OR (id = 67)) | |
-- Rows Removed by Filter: 199652 | |
-- Planning time: 0.292 ms | |
-- Execution time: 1390.529 ms | |
-- anti-join 1 | |
explain analyze | |
select posts.* | |
from posts | |
left outer join users | |
on users.id = posts.user_id | |
and (users.shadowbanned = false or users.id = 67) | |
where users.id is null; | |
-- QUERY PLAN | |
-- ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── | |
-- Hash Anti Join (cost=30051.84..66066.62 rows=199933 width=8) (actual time=255.048..1034.968 rows=199652 loops=1) | |
-- Hash Cond: (posts.user_id = users.id) | |
-- -> Seq Scan on posts (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.004..76.532 rows=1000000 loops=1) | |
-- -> Hash (cost=16925.00..16925.00 rows=800067 width=4) (actual time=254.702..254.702 rows=800348 loops=1) | |
-- Buckets: 65536 Batches: 2 Memory Usage: 14072kB | |
-- -> Seq Scan on users (cost=0.00..16925.00 rows=800067 width=4) (actual time=0.004..132.280 rows=800348 loops=1) | |
-- Filter: ((NOT shadowbanned) OR (id = 67)) | |
-- Rows Removed by Filter: 199652 | |
-- Planning time: 0.193 ms | |
-- Execution time: 1047.990 ms | |
-- anti-join 2 | |
explain analyze | |
select posts.* | |
from posts | |
where not exists ( | |
select 1 | |
from users | |
where users.id = posts.user_id | |
and (users.shadowbanned = false or users.id = 67) | |
); | |
-- QUERY PLAN | |
-- ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── | |
-- Hash Anti Join (cost=30051.84..66066.62 rows=199933 width=8) (actual time=261.219..1024.009 rows=199652 loops=1) | |
-- Hash Cond: (posts.user_id = users.id) | |
-- -> Seq Scan on posts (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.003..72.475 rows=1000000 loops=1) | |
-- -> Hash (cost=16925.00..16925.00 rows=800067 width=4) (actual time=260.934..260.934 rows=800348 loops=1) | |
-- Buckets: 65536 Batches: 2 Memory Usage: 14072kB | |
-- -> Seq Scan on users (cost=0.00..16925.00 rows=800067 width=4) (actual time=0.003..133.428 rows=800348 loops=1) | |
-- Filter: ((NOT shadowbanned) OR (id = 67)) | |
-- Rows Removed by Filter: 199652 | |
-- Planning time: 0.198 ms | |
-- Execution time: 1035.989 ms | |
-- limited work set | |
explain analyze | |
select limited_posts.* | |
from ( | |
select posts.* | |
from posts | |
limit 10000 | |
) as limited_posts | |
where not exists ( | |
select 1 | |
from users | |
where users.id = limited_posts.user_id | |
and (users.shadowbanned = false or users.id = 67) | |
); | |
-- QUERY PLAN | |
-- ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── | |
-- Nested Loop Anti Join (cost=0.42..33474.25 rows=1 width=8) (actual time=0.030..18.624 rows=2050 loops=1) | |
-- -> Limit (cost=0.00..144.25 rows=10000 width=8) (actual time=0.006..1.842 rows=10000 loops=1) | |
-- -> Seq Scan on posts (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.006..0.861 rows=10000 loops=1) | |
-- -> Index Scan using users_pkey on users (cost=0.42..3.31 rows=1 width=4) (actual time=0.001..0.001 rows=1 loops=10000) | |
-- Index Cond: (id = posts.user_id) | |
-- Filter: ((NOT shadowbanned) OR (id = 67)) | |
-- Rows Removed by Filter: 0 | |
-- Planning time: 0.260 ms | |
-- Execution time: 18.796 ms | |
-- probably a better query plan | |
explain analyze | |
select limited_posts.* | |
from ( | |
select posts.* | |
from posts | |
limit 20000 | |
) as limited_posts | |
left outer join users | |
on users.id = limited_posts.user_id | |
and (users.shadowbanned = false) | |
where users.id is null | |
union | |
select limited_posts.* | |
from ( | |
select posts.* | |
from posts | |
limit 20000 | |
) as limited_posts | |
where limited_posts.user_id = 67; | |
-- QUERY PLAN | |
-- ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── | |
-- Unique (cost=21107.03..21107.04 rows=2 width=8) (actual time=41.130..41.833 rows=4072 loops=1) | |
-- -> Sort (cost=21107.03..21107.03 rows=2 width=8) (actual time=41.129..41.318 rows=4073 loops=1) | |
-- Sort Key: posts.id, posts.user_id | |
-- Sort Method: quicksort Memory: 287kB | |
-- -> Append (cost=0.42..21107.02 rows=2 width=8) (actual time=0.034..39.837 rows=4073 loops=1) | |
-- -> Nested Loop Anti Join (cost=0.42..20568.50 rows=1 width=8) (actual time=0.034..35.172 rows=4072 loops=1) | |
-- -> Limit (cost=0.00..288.50 rows=20000 width=8) (actual time=0.006..3.623 rows=20000 loops=1) | |
-- -> Seq Scan on posts (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.005..1.636 rows=20000 loops=1) | |
-- -> Index Only Scan using composite_idx on users (cost=0.42..0.99 rows=1 width=4) (actual time=0.001..0.001 rows=1 loops=20000) | |
-- Index Cond: ((shadowbanned = false) AND (id = posts.user_id)) | |
-- Filter: (NOT shadowbanned) | |
-- Heap Fetches: 0 | |
-- -> Subquery Scan on limited_posts (cost=0.00..538.50 rows=1 width=8) (actual time=0.021..4.271 rows=1 loops=1) | |
-- Filter: (limited_posts.user_id = 67) | |
-- Rows Removed by Filter: 19999 | |
-- -> Limit (cost=0.00..288.50 rows=20000 width=8) (actual time=0.003..2.814 rows=20000 loops=1) | |
-- -> Seq Scan on posts posts_1 (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.002..1.210 rows=20000 loops=1) | |
-- Planning time: 0.310 ms | |
-- Execution time: 42.064 ms | |
-- lets normalise | |
-- create some data | |
create table users ( | |
id serial primary key | |
); | |
create table shadowbans ( | |
id serial primary key, | |
user_id integer references users(id) | |
); | |
create table posts ( | |
id serial primary key, | |
user_id integer references users(id) | |
); | |
insert into users select * from generate_series(1, 1000000); | |
insert into shadowbans(user_id) select * from generate_series(1, 100000); | |
insert into posts (user_id) select (id) from users; | |
-- create some useful indicies | |
create index posts_user_id_idx on posts(user_id); | |
create index shadowbans_user_id_idx on shadowbans(user_id); | |
-- freshen up | |
vacuum analyze; | |
-- anti join | |
explain analyze | |
select posts.* | |
from ( | |
select posts.* | |
from posts | |
limit 20000 | |
) as posts | |
left outer join shadowbans | |
on shadowbans.user_id = posts.user_id | |
where posts.user_id = 67 or shadowbans.user_id is null; | |
-- QUERY PLAN | |
-- ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── | |
-- Hash Left Join (cost=2693.00..3631.50 rows=1 width=8) (actual time=24.940..40.187 rows=1 loops=1) | |
-- Hash Cond: (posts.user_id = shadowbans.user_id) | |
-- Filter: ((posts.user_id = 67) OR (shadowbans.user_id IS NULL)) | |
-- Rows Removed by Filter: 19999 | |
-- -> Limit (cost=0.00..288.50 rows=20000 width=8) (actual time=0.006..2.802 rows=20000 loops=1) | |
-- -> Seq Scan on posts (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.006..1.283 rows=20000 loops=1) | |
-- -> Hash (cost=1443.00..1443.00 rows=100000 width=4) (actual time=24.845..24.845 rows=100000 loops=1) | |
-- Buckets: 16384 Batches: 1 Memory Usage: 3516kB | |
-- -> Seq Scan on shadowbans (cost=0.00..1443.00 rows=100000 width=4) (actual time=0.004..12.441 rows=100000 loops=1) | |
-- Planning time: 0.220 ms | |
-- Execution time: 40.222 ms |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment