onderkalaci/steps_to_repro_perf_gains.sql

## steps_to_repro_perf_gains.sql
-- tables and load 1M rows
CREATE TABLE table_1 (key int primary key, value int);
SELECT create_distributed_table('table_1', 'key');
INSERT INTO table_1 SELECT i, i % 1000 FROM generate_series(0, 1000000)i;

-- create index on the table that we'll use as an index
CREATE INDEX table_1_value ON table_1 (value);

-- create the second table and populate it
CREATE TABLE table_2 (key int primary key, value int);
SELECT create_distributed_table('table_2', 'key');
INSERT INTO table_2 SELECT * FROM table_1;

-- multi shard lookup queries on the index
cat queries.sql
\set aid random(0, 1000000)
SELECT count(*) FROM table_2 WHERE value = :bid;

-- changes between ~500msec to ~1000 msec query depending on the filter
-- the median is ~500 msec
\set aid random(500, 1000)
SELECT COUNT(*) OVER (PARTITION BY table_2.key) FROM table_1 JOIN table_2 USING(key) WHERE table_1.value > :aid GROUP BY table_2.key;

-- lets run lots of simple lookups, covering all the multi-shard join times
pgbench -f queries.sql -c XXX -j 32 -T 150 -P 1 "connection string"
	-- tables and load 1M rows
	CREATE TABLE table_1 (key int primary key, value int);
	SELECT create_distributed_table('table_1', 'key');
	INSERT INTO table_1 SELECT i, i % 1000 FROM generate_series(0, 1000000)i;

	-- create index on the table that we'll use as an index
	CREATE INDEX table_1_value ON table_1 (value);

	-- create the second table and populate it
	CREATE TABLE table_2 (key int primary key, value int);
	SELECT create_distributed_table('table_2', 'key');
	INSERT INTO table_2 SELECT * FROM table_1;

	-- multi shard lookup queries on the index
	cat queries.sql
	\set aid random(0, 1000000)
	SELECT count(*) FROM table_2 WHERE value = :bid;

	-- changes between ~500msec to ~1000 msec query depending on the filter
	-- the median is ~500 msec
	\set aid random(500, 1000)
	SELECT COUNT(*) OVER (PARTITION BY table_2.key) FROM table_1 JOIN table_2 USING(key) WHERE table_1.value > :aid GROUP BY table_2.key;

	-- lets run lots of simple lookups, covering all the multi-shard join times
	pgbench -f queries.sql -c XXX -j 32 -T 150 -P 1 "connection string"