Skip to content

Instantly share code, notes, and snippets.

@onderkalaci
Last active May 20, 2020 07:28
Show Gist options
  • Save onderkalaci/97bd2ddfecef49751b4ccb93f498c5c1 to your computer and use it in GitHub Desktop.
Save onderkalaci/97bd2ddfecef49751b4ccb93f498c5c1 to your computer and use it in GitHub Desktop.
SELECT count(*) FROM users_table;
SELECT pg_sleep((random() * 10)::int), count(*) from users_Table;
SELECT pg_sleep(2), count(*) from users_Table;
SET citus.force_max_query_parallelization TO ON;
SELECT count(*) FROM users_table;
SET citus.force_max_query_parallelization TO ON;
SELECT pg_sleep((random() * 10)::int),count(*) FROM users_table;
UPDATE users_table SET value_1 = value_1 + 1;
WITH cte_1 AS (SELECT * FROM users_table LIMIT 10),
cte_2 AS (SELECT * FROM events_table LIMIT 1000)
SELECT count(*) FROM users_table WHERE user_id NOT IN (SELECT cte_1.user_id FROM cte_1 JOIN cte_2 USING (user_id));
WITH cte_1 AS (SELECT * FROM users_table LIMIT 10),
cte_2 AS (SELECT * FROM events_table LIMIT 1000)
SELECT cte_1.user_id FROM cte_1 JOIN cte_2 USING (user_id);
-- pg 11, not inlined
WITH users_events AS
(
SELECT
user_id
FROM
users_table
)
SELECT
uid,
event_type,
value_2,
value_3
FROM (
(SELECT
user_id as uid
FROM
users_events
) users
JOIN
events_table
ON
users.uid = events_table.event_type
) a
ORDER BY
1,2,3,4
LIMIT 5;
Set citus.enable_repartition_joins to on;
SELECT count(*) FROM users_table JOIN events_table USING (value_2);
CREATE TABLE users_table (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
SELECT create_distributed_table('users_table', 'user_id');
CREATE TABLE events_table (user_id int, time timestamp, event_type int, value_2 int, value_3 float, value_4 bigint);
SELECT create_distributed_table('events_table', 'user_id');
INSERT INTO users_table SELECT (i * random())::int % 10000, timestamp '2014-01-10 20:00:00' +
random() * (timestamp '2014-01-20 20:00:00' -
timestamp '2014-01-10 10:00:00'),(i * random())::int % 10000, (i * random())::int % 10000, (i * random())::int % 10000 FROM generate_series(0, 1000000) i;
INSERT INTO events_table SELECT (i * random())::int % 10000, timestamp '2014-01-10 20:00:00' +
random() * (timestamp '2014-01-20 20:00:00' -
timestamp '2014-01-10 10:00:00'),(i * random())::int % 10000, (i * random())::int % 10000, (i * random())::int % 10000 FROM generate_series(0, 1000000) i;
-- watch the following command to see is everything is good
select * from citus_remote_connection_stats();
-- sometimes cancel pg bench, or even killall -9 postgres on any of the workers and/or coordinator
-- change -c between 64 - 300
-- -T is preffed to be 150 to make it > citus.connection_retry_timeout
pgbench -f /Users/onderkalaci/Documents/poolSizeTests/very_fast_multi_shard.sql -c 64 -j8 -T 150 -P 1 postgres
pgbench -f /Users/onderkalaci/Documents/poolSizeTests/randomly_long_queries.sql -c 64 -j8 -T 150 -P 1 postgres
pgbench -f /Users/onderkalaci/Documents/poolSizeTests/fixed_long_queries.sql -c 64 -j8 -T 150 -P 1 postgres
pgbench -f /Users/onderkalaci/Documents/poolSizeTests/simple_force_query_parallelization.sql -c 64 -j8 -T 150 -P 1 postgres
pgbench -f /Users/onderkalaci/Documents/poolSizeTests/random_long_force_query_parallelization.sql -c 64 -j8 -T 150 -P 1 postgres
--
alter system set max_connections TO 300;
-- well, with that many connections, Citus somettimes fails to establish
-- the connections in 5 seconds. That's sometthing to consider increasing.
alter system set citus.node_connection_timeout TO '60s';
-- in some tests change
citus.connection_retry_timeout, set citus.force_max_query_parallelization TO on;, max_connections, max_shared_pool_size
-- workers with 300 and 600 as well
SELECT run_command_on_workers($$alter system set max_connections TO 300;$$);
SELECT run_command_on_workers($$alter system set max_connections TO 600;$$);
create table countries(
id serial primary key
, name text
, code varchar(2) collate "C" unique
);
insert into countries(name, code) select 'country-'||i, i::text from generate_series(10,99) i;
select create_reference_table('countries');
create table orgs (
id bigserial primary key
, name text
, created_at timestamptz default now()
);
select create_distributed_table('orgs', 'id');
create table users (
id bigserial
, org_id bigint references orgs(id)
, name text
, created_at timestamptz default now()
, country_id int references countries(id)
, score bigint generated always as (id + country_id) stored
, primary key (org_id, id)
);
select create_distributed_table('users', 'org_id');
alter table users add constraint fk_user_country foreign key (country_id) references countries(id);
create table orders (
id bigserial
, org_id bigint references orgs(id)
, user_id bigint
, price int
, info jsonb
, primary key (org_id, id)
, foreign key (org_id, user_id) references users(org_id, id)
);
select create_distributed_table('orders', 'org_id');
create table events (
id bigserial not null
, user_id bigint not null
, org_id bigint not null
, event_time timestamp not null default now()
, event_type int not null default 0
, payload jsonb
, primary key (user_id, id)
);
create index event_time_idx on events using BRIN (event_time);
create index event_json_idx on events using gin(payload);
select create_distributed_table('events', 'user_id'); -- on purpose don't collocate on correctly on org_id
create table local_data(
id bigserial primary key
, val int default ( (random()*100)::int )
);
insert into orgs(id, name) select i,'org-'||i from generate_series(1,1000) i;
insert into users(id, name, org_id, country_id) select i,'user-'||i, i%1000+1, (i%90)+1 from generate_series(1,100000) i;
insert into orders(id, org_id, user_id, price) select i, ((i%100000+1)%1000)+1 , i%100000+1, i/100 from generate_series(1,1000000) i;
insert into events(id, org_id, user_id, event_type) select i, ((i%100000+1)%1000)+1 , i%100000+1, i/100 from generate_series(1,1000000) i;
insert into local_data(id) select generate_series(1,1000);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment