Skip to content

Instantly share code, notes, and snippets.

@mble
Last active August 1, 2017 19:19
Show Gist options
  • Save mble/ed1d8c20865b880f4ca4735d52947eb7 to your computer and use it in GitHub Desktop.
Save mble/ed1d8c20865b880f4ca4735d52947eb7 to your computer and use it in GitHub Desktop.
Some join benchmarking
-- create some data
create table users (
id serial primary key,
shadowbanned boolean default false
);
create table posts (
id serial primary key,
user_id integer references users(id)
);
insert into users (shadowbanned) select (random() > 0.8) from generate_series(1, 1000000);
insert into posts (user_id) select (id) from users;
-- create some useful indicies
create index posts_user_id_idx on posts(user_id);
create index users_shadowbanned_idx on users(shadowbanned);
-- freshen up
vacuum analyze;
-- values join
explain analyze
select posts.*
from (select unnest(array_agg(users.id)) as user_id from users where users.shadowbanned = false or users.id = 67) as user_id
join posts using(user_id);
-- QUERY PLAN
-- ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
-- Nested Loop (cost=18925.59..19764.18 rows=100 width=8) (actual time=185.363..1547.569 rows=800348 loops=1)
-- -> Aggregate (cost=18925.17..18925.68 rows=100 width=4) (actual time=185.348..258.586 rows=800348 loops=1)
-- -> Seq Scan on users (cost=0.00..16925.00 rows=800067 width=4) (actual time=0.010..106.919 rows=800348 loops=1)
-- Filter: ((NOT shadowbanned) OR (id = 67))
-- Rows Removed by Filter: 199652
-- -> Index Scan using posts_user_id_idx on posts (cost=0.42..8.37 rows=1 width=8) (actual time=0.001..0.001 rows=1 loops=800348)
-- Index Cond: (user_id = (unnest(array_agg(users.id))))
-- Planning time: 0.185 ms
-- Execution time: 1590.360 ms
-- values join 2
explain analyze
select posts.*
from posts
inner join (
select id from users where users.shadowbanned = false or users.id = 67
) ex(user_id) on (posts.user_id = ex.user_id);
-- QUERY PLAN
-- ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
-- Hash Join (cost=30051.84..73417.51 rows=800067 width=8) (actual time=257.991..1347.789 rows=800348 loops=1)
-- Hash Cond: (posts.user_id = users.id)
-- -> Seq Scan on posts (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.005..77.740 rows=1000000 loops=1)
-- -> Hash (cost=16925.00..16925.00 rows=800067 width=4) (actual time=257.374..257.374 rows=800348 loops=1)
-- Buckets: 65536 Batches: 2 Memory Usage: 14072kB
-- -> Seq Scan on users (cost=0.00..16925.00 rows=800067 width=4) (actual time=0.006..132.818 rows=800348 loops=1)
-- Filter: ((NOT shadowbanned) OR (id = 67))
-- Rows Removed by Filter: 199652
-- Planning time: 0.292 ms
-- Execution time: 1390.529 ms
-- anti-join 1
explain analyze
select posts.*
from posts
left outer join users
on users.id = posts.user_id
and (users.shadowbanned = false or users.id = 67)
where users.id is null;
-- QUERY PLAN
-- ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
-- Hash Anti Join (cost=30051.84..66066.62 rows=199933 width=8) (actual time=255.048..1034.968 rows=199652 loops=1)
-- Hash Cond: (posts.user_id = users.id)
-- -> Seq Scan on posts (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.004..76.532 rows=1000000 loops=1)
-- -> Hash (cost=16925.00..16925.00 rows=800067 width=4) (actual time=254.702..254.702 rows=800348 loops=1)
-- Buckets: 65536 Batches: 2 Memory Usage: 14072kB
-- -> Seq Scan on users (cost=0.00..16925.00 rows=800067 width=4) (actual time=0.004..132.280 rows=800348 loops=1)
-- Filter: ((NOT shadowbanned) OR (id = 67))
-- Rows Removed by Filter: 199652
-- Planning time: 0.193 ms
-- Execution time: 1047.990 ms
-- anti-join 2
explain analyze
select posts.*
from posts
where not exists (
select 1
from users
where users.id = posts.user_id
and (users.shadowbanned = false or users.id = 67)
);
-- QUERY PLAN
-- ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
-- Hash Anti Join (cost=30051.84..66066.62 rows=199933 width=8) (actual time=261.219..1024.009 rows=199652 loops=1)
-- Hash Cond: (posts.user_id = users.id)
-- -> Seq Scan on posts (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.003..72.475 rows=1000000 loops=1)
-- -> Hash (cost=16925.00..16925.00 rows=800067 width=4) (actual time=260.934..260.934 rows=800348 loops=1)
-- Buckets: 65536 Batches: 2 Memory Usage: 14072kB
-- -> Seq Scan on users (cost=0.00..16925.00 rows=800067 width=4) (actual time=0.003..133.428 rows=800348 loops=1)
-- Filter: ((NOT shadowbanned) OR (id = 67))
-- Rows Removed by Filter: 199652
-- Planning time: 0.198 ms
-- Execution time: 1035.989 ms
-- limited work set
explain analyze
select limited_posts.*
from (
select posts.*
from posts
limit 10000
) as limited_posts
where not exists (
select 1
from users
where users.id = limited_posts.user_id
and (users.shadowbanned = false or users.id = 67)
);
-- QUERY PLAN
-- ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
-- Nested Loop Anti Join (cost=0.42..33474.25 rows=1 width=8) (actual time=0.030..18.624 rows=2050 loops=1)
-- -> Limit (cost=0.00..144.25 rows=10000 width=8) (actual time=0.006..1.842 rows=10000 loops=1)
-- -> Seq Scan on posts (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.006..0.861 rows=10000 loops=1)
-- -> Index Scan using users_pkey on users (cost=0.42..3.31 rows=1 width=4) (actual time=0.001..0.001 rows=1 loops=10000)
-- Index Cond: (id = posts.user_id)
-- Filter: ((NOT shadowbanned) OR (id = 67))
-- Rows Removed by Filter: 0
-- Planning time: 0.260 ms
-- Execution time: 18.796 ms
-- probably a better query plan
explain analyze
select limited_posts.*
from (
select posts.*
from posts
limit 20000
) as limited_posts
left outer join users
on users.id = limited_posts.user_id
and (users.shadowbanned = false)
where users.id is null
union
select limited_posts.*
from (
select posts.*
from posts
limit 20000
) as limited_posts
where limited_posts.user_id = 67;
-- QUERY PLAN
-- ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
-- Unique (cost=21107.03..21107.04 rows=2 width=8) (actual time=41.130..41.833 rows=4072 loops=1)
-- -> Sort (cost=21107.03..21107.03 rows=2 width=8) (actual time=41.129..41.318 rows=4073 loops=1)
-- Sort Key: posts.id, posts.user_id
-- Sort Method: quicksort Memory: 287kB
-- -> Append (cost=0.42..21107.02 rows=2 width=8) (actual time=0.034..39.837 rows=4073 loops=1)
-- -> Nested Loop Anti Join (cost=0.42..20568.50 rows=1 width=8) (actual time=0.034..35.172 rows=4072 loops=1)
-- -> Limit (cost=0.00..288.50 rows=20000 width=8) (actual time=0.006..3.623 rows=20000 loops=1)
-- -> Seq Scan on posts (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.005..1.636 rows=20000 loops=1)
-- -> Index Only Scan using composite_idx on users (cost=0.42..0.99 rows=1 width=4) (actual time=0.001..0.001 rows=1 loops=20000)
-- Index Cond: ((shadowbanned = false) AND (id = posts.user_id))
-- Filter: (NOT shadowbanned)
-- Heap Fetches: 0
-- -> Subquery Scan on limited_posts (cost=0.00..538.50 rows=1 width=8) (actual time=0.021..4.271 rows=1 loops=1)
-- Filter: (limited_posts.user_id = 67)
-- Rows Removed by Filter: 19999
-- -> Limit (cost=0.00..288.50 rows=20000 width=8) (actual time=0.003..2.814 rows=20000 loops=1)
-- -> Seq Scan on posts posts_1 (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.002..1.210 rows=20000 loops=1)
-- Planning time: 0.310 ms
-- Execution time: 42.064 ms
-- lets normalise
-- create some data
create table users (
id serial primary key
);
create table shadowbans (
id serial primary key,
user_id integer references users(id)
);
create table posts (
id serial primary key,
user_id integer references users(id)
);
insert into users select * from generate_series(1, 1000000);
insert into shadowbans(user_id) select * from generate_series(1, 100000);
insert into posts (user_id) select (id) from users;
-- create some useful indicies
create index posts_user_id_idx on posts(user_id);
create index shadowbans_user_id_idx on shadowbans(user_id);
-- freshen up
vacuum analyze;
-- anti join
explain analyze
select posts.*
from (
select posts.*
from posts
limit 20000
) as posts
left outer join shadowbans
on shadowbans.user_id = posts.user_id
where posts.user_id = 67 or shadowbans.user_id is null;
-- QUERY PLAN
-- ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
-- Hash Left Join (cost=2693.00..3631.50 rows=1 width=8) (actual time=24.940..40.187 rows=1 loops=1)
-- Hash Cond: (posts.user_id = shadowbans.user_id)
-- Filter: ((posts.user_id = 67) OR (shadowbans.user_id IS NULL))
-- Rows Removed by Filter: 19999
-- -> Limit (cost=0.00..288.50 rows=20000 width=8) (actual time=0.006..2.802 rows=20000 loops=1)
-- -> Seq Scan on posts (cost=0.00..14425.00 rows=1000000 width=8) (actual time=0.006..1.283 rows=20000 loops=1)
-- -> Hash (cost=1443.00..1443.00 rows=100000 width=4) (actual time=24.845..24.845 rows=100000 loops=1)
-- Buckets: 16384 Batches: 1 Memory Usage: 3516kB
-- -> Seq Scan on shadowbans (cost=0.00..1443.00 rows=100000 width=4) (actual time=0.004..12.441 rows=100000 loops=1)
-- Planning time: 0.220 ms
-- Execution time: 40.222 ms
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment