Skip to content

Instantly share code, notes, and snippets.

@onderkalaci
Created July 23, 2021 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save onderkalaci/fb8e42c44475c4a14c66461ba81dbf37 to your computer and use it in GitHub Desktop.
Save onderkalaci/fb8e42c44475c4a14c66461ba81dbf37 to your computer and use it in GitHub Desktop.
-- tables and load 1M rows
CREATE TABLE table_1 (key int primary key, value int);
SELECT create_distributed_table('table_1', 'key');
INSERT INTO table_1 SELECT i, i % 1000 FROM generate_series(0, 1000000)i;
-- create index on the table that we'll use as an index
CREATE INDEX table_1_value ON table_1 (value);
-- create the second table and populate it
CREATE TABLE table_2 (key int primary key, value int);
SELECT create_distributed_table('table_2', 'key');
INSERT INTO table_2 SELECT * FROM table_1;
-- multi shard lookup queries on the index
cat queries.sql
\set aid random(0, 1000000)
SELECT count(*) FROM table_2 WHERE value = :bid;
-- changes between ~500msec to ~1000 msec query depending on the filter
-- the median is ~500 msec
\set aid random(500, 1000)
SELECT COUNT(*) OVER (PARTITION BY table_2.key) FROM table_1 JOIN table_2 USING(key) WHERE table_1.value > :aid GROUP BY table_2.key;
-- lets run lots of simple lookups, covering all the multi-shard join times
pgbench -f queries.sql -c XXX -j 32 -T 150 -P 1 "connection string"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment