onderkalaci/example_queries.sql

## example_queries.sql
SELECT count(*) FROM users_table;

SELECT pg_sleep((random() * 10)::int), count(*) from users_Table;

SELECT pg_sleep(2), count(*) from users_Table;

SET citus.force_max_query_parallelization TO ON;
SELECT count(*) FROM users_table;

SET citus.force_max_query_parallelization TO ON;
SELECT pg_sleep((random() * 10)::int),count(*) FROM users_table;

UPDATE users_table SET value_1 = value_1 + 1;

WITH cte_1 AS (SELECT *  FROM users_table LIMIT 10),
	 cte_2 AS (SELECT * FROM events_table LIMIT 1000)
SELECT count(*) FROM users_table WHERE user_id NOT IN (SELECT cte_1.user_id FROM cte_1 JOIN cte_2 USING (user_id));


WITH cte_1 AS (SELECT *  FROM users_table LIMIT 10),
	 cte_2 AS (SELECT * FROM events_table LIMIT 1000)
SELECT cte_1.user_id FROM cte_1 JOIN cte_2 USING (user_id);

-- pg 11, not inlined
WITH users_events AS
(
  SELECT
    user_id
  FROM
    users_table
)
SELECT
  uid,
  event_type,
  value_2,
  value_3
FROM (
  (SELECT
    user_id as uid
  FROM
    users_events
  ) users
  JOIN
    events_table
  ON
    users.uid = events_table.event_type
  ) a
ORDER BY
  1,2,3,4
LIMIT 5;


Set citus.enable_repartition_joins to on;

SELECT count(*) FROM users_table JOIN events_table USING (value_2);


## main.sql

CREATE TABLE users_table (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
SELECT create_distributed_table('users_table', 'user_id');

CREATE TABLE events_table (user_id int, time timestamp, event_type int, value_2 int, value_3 float, value_4 bigint);
SELECT create_distributed_table('events_table', 'user_id');


INSERT INTO users_table SELECT (i * random())::int % 10000, timestamp '2014-01-10 20:00:00' +
       random() * (timestamp '2014-01-20 20:00:00' -
                   timestamp '2014-01-10 10:00:00'),(i * random())::int % 10000, (i * random())::int % 10000, (i * random())::int % 10000 FROM generate_series(0, 1000000) i;


INSERT INTO events_table SELECT (i * random())::int % 10000, timestamp '2014-01-10 20:00:00' +
       random() * (timestamp '2014-01-20 20:00:00' -
                   timestamp '2014-01-10 10:00:00'),(i * random())::int % 10000, (i * random())::int % 10000, (i * random())::int % 10000 FROM generate_series(0, 1000000) i;


-- watch the following command to see is everything is good
select * from citus_remote_connection_stats();

-- sometimes cancel pg bench, or even killall -9 postgres on any of the workers and/or coordinator


-- change -c between 64 - 300
-- -T is preffed to be 150 to make it > citus.connection_retry_timeout

pgbench -f /Users/onderkalaci/Documents/poolSizeTests/very_fast_multi_shard.sql -c 64 -j8 -T 150 -P 1 postgres

pgbench -f /Users/onderkalaci/Documents/poolSizeTests/randomly_long_queries.sql -c 64 -j8 -T 150 -P 1 postgres

pgbench -f /Users/onderkalaci/Documents/poolSizeTests/fixed_long_queries.sql -c 64 -j8 -T 150 -P 1 postgres

pgbench -f /Users/onderkalaci/Documents/poolSizeTests/simple_force_query_parallelization.sql -c 64 -j8 -T 150 -P 1 postgres

pgbench -f /Users/onderkalaci/Documents/poolSizeTests/random_long_force_query_parallelization.sql -c 64 -j8 -T 150 -P 1 postgres


--
alter system set max_connections TO 300;

-- well, with that many connections, Citus somettimes fails to establish
-- the connections  in 5 seconds. That's sometthing to consider increasing.
alter system set citus.node_connection_timeout TO '60s';


-- in some tests change
citus.connection_retry_timeout, set citus.force_max_query_parallelization TO on;, max_connections, max_shared_pool_size

-- workers with 300 and  600 as well
SELECT run_command_on_workers($$alter system set max_connections TO 300;$$);
SELECT run_command_on_workers($$alter system set max_connections TO 600;$$);

## wills_queries.sql

create table countries(
  id serial primary key
  , name text
  , code varchar(2) collate "C" unique
);
insert into countries(name, code) select 'country-'||i, i::text from generate_series(10,99) i;
select create_reference_table('countries');

create table orgs (
    id bigserial  primary key
  , name text
  , created_at timestamptz default now()
);
select create_distributed_table('orgs', 'id');

create table users (
    id bigserial
  , org_id bigint  references orgs(id)
  , name text
  , created_at timestamptz default now()
  , country_id int references countries(id)
  , score bigint generated always as (id + country_id) stored
  , primary key (org_id, id)
);
select create_distributed_table('users', 'org_id');
alter table users add constraint fk_user_country foreign key (country_id) references countries(id);

create table orders (
    id bigserial
  , org_id bigint references orgs(id)
  , user_id bigint
  , price int
  , info jsonb
  , primary key (org_id, id)
  , foreign key (org_id, user_id) references users(org_id, id)
);
select create_distributed_table('orders', 'org_id');

create table events (
    id bigserial not null
  , user_id bigint not null
  , org_id bigint not null
  , event_time timestamp not null default now()
  , event_type int not null default 0
  , payload jsonb
  , primary key (user_id, id)
);
create index event_time_idx on events using BRIN (event_time);
create index event_json_idx on events using gin(payload);
select create_distributed_table('events', 'user_id'); -- on purpose don't collocate on correctly on org_id


create table local_data(
  id bigserial primary key
  , val int default ( (random()*100)::int )
);


insert into orgs(id, name) select  i,'org-'||i from generate_series(1,1000) i;
insert into users(id, name, org_id, country_id) select i,'user-'||i, i%1000+1, (i%90)+1  from generate_series(1,100000) i;
insert into orders(id, org_id, user_id, price) select i, ((i%100000+1)%1000)+1 , i%100000+1, i/100 from generate_series(1,1000000) i;
insert into events(id, org_id, user_id, event_type) select i, ((i%100000+1)%1000)+1 , i%100000+1, i/100 from generate_series(1,1000000) i;
insert into local_data(id) select generate_series(1,1000);
	SELECT count(*) FROM users_table;

	SELECT pg_sleep((random() * 10)::int), count(*) from users_Table;

	SELECT pg_sleep(2), count(*) from users_Table;

	SET citus.force_max_query_parallelization TO ON;
	SELECT count(*) FROM users_table;

	SET citus.force_max_query_parallelization TO ON;
	SELECT pg_sleep((random() * 10)::int),count(*) FROM users_table;

	UPDATE users_table SET value_1 = value_1 + 1;

	WITH cte_1 AS (SELECT * FROM users_table LIMIT 10),
	cte_2 AS (SELECT * FROM events_table LIMIT 1000)
	SELECT count(*) FROM users_table WHERE user_id NOT IN (SELECT cte_1.user_id FROM cte_1 JOIN cte_2 USING (user_id));


	WITH cte_1 AS (SELECT * FROM users_table LIMIT 10),
	cte_2 AS (SELECT * FROM events_table LIMIT 1000)
	SELECT cte_1.user_id FROM cte_1 JOIN cte_2 USING (user_id);

	-- pg 11, not inlined
	WITH users_events AS
	(
	SELECT
	user_id
	FROM
	users_table
	)
	SELECT
	uid,
	event_type,
	value_2,
	value_3
	FROM (
	(SELECT
	user_id as uid
	FROM
	users_events
	) users
	JOIN
	events_table
	ON
	users.uid = events_table.event_type
	) a
	ORDER BY
	1,2,3,4
	LIMIT 5;


	Set citus.enable_repartition_joins to on;

	SELECT count(*) FROM users_table JOIN events_table USING (value_2);

	CREATE TABLE users_table (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
	SELECT create_distributed_table('users_table', 'user_id');

	CREATE TABLE events_table (user_id int, time timestamp, event_type int, value_2 int, value_3 float, value_4 bigint);
	SELECT create_distributed_table('events_table', 'user_id');



	INSERT INTO users_table SELECT (i * random())::int % 10000, timestamp '2014-01-10 20:00:00' +
	random() * (timestamp '2014-01-20 20:00:00' -
	timestamp '2014-01-10 10:00:00'),(i * random())::int % 10000, (i * random())::int % 10000, (i * random())::int % 10000 FROM generate_series(0, 1000000) i;


	INSERT INTO events_table SELECT (i * random())::int % 10000, timestamp '2014-01-10 20:00:00' +
	random() * (timestamp '2014-01-20 20:00:00' -
	timestamp '2014-01-10 10:00:00'),(i * random())::int % 10000, (i * random())::int % 10000, (i * random())::int % 10000 FROM generate_series(0, 1000000) i;




	-- watch the following command to see is everything is good
	select * from citus_remote_connection_stats();

	-- sometimes cancel pg bench, or even killall -9 postgres on any of the workers and/or coordinator


	-- change -c between 64 - 300
	-- -T is preffed to be 150 to make it > citus.connection_retry_timeout

	pgbench -f /Users/onderkalaci/Documents/poolSizeTests/very_fast_multi_shard.sql -c 64 -j8 -T 150 -P 1 postgres

	pgbench -f /Users/onderkalaci/Documents/poolSizeTests/randomly_long_queries.sql -c 64 -j8 -T 150 -P 1 postgres

	pgbench -f /Users/onderkalaci/Documents/poolSizeTests/fixed_long_queries.sql -c 64 -j8 -T 150 -P 1 postgres

	pgbench -f /Users/onderkalaci/Documents/poolSizeTests/simple_force_query_parallelization.sql -c 64 -j8 -T 150 -P 1 postgres

	pgbench -f /Users/onderkalaci/Documents/poolSizeTests/random_long_force_query_parallelization.sql -c 64 -j8 -T 150 -P 1 postgres


	--
	alter system set max_connections TO 300;

	-- well, with that many connections, Citus somettimes fails to establish
	-- the connections in 5 seconds. That's sometthing to consider increasing.
	alter system set citus.node_connection_timeout TO '60s';


	-- in some tests change
	citus.connection_retry_timeout, set citus.force_max_query_parallelization TO on;, max_connections, max_shared_pool_size

	-- workers with 300 and 600 as well
	SELECT run_command_on_workers($$alter system set max_connections TO 300;$$);
	SELECT run_command_on_workers($$alter system set max_connections TO 600;$$);

	create table countries(
	id serial primary key
	, name text
	, code varchar(2) collate "C" unique
	);
	insert into countries(name, code) select 'country-'\|\|i, i::text from generate_series(10,99) i;
	select create_reference_table('countries');

	create table orgs (
	id bigserial primary key
	, name text
	, created_at timestamptz default now()
	);
	select create_distributed_table('orgs', 'id');

	create table users (
	id bigserial
	, org_id bigint references orgs(id)
	, name text
	, created_at timestamptz default now()
	, country_id int references countries(id)
	, score bigint generated always as (id + country_id) stored
	, primary key (org_id, id)
	);
	select create_distributed_table('users', 'org_id');
	alter table users add constraint fk_user_country foreign key (country_id) references countries(id);

	create table orders (
	id bigserial
	, org_id bigint references orgs(id)
	, user_id bigint
	, price int
	, info jsonb
	, primary key (org_id, id)
	, foreign key (org_id, user_id) references users(org_id, id)
	);
	select create_distributed_table('orders', 'org_id');

	create table events (
	id bigserial not null
	, user_id bigint not null
	, org_id bigint not null
	, event_time timestamp not null default now()
	, event_type int not null default 0
	, payload jsonb
	, primary key (user_id, id)
	);
	create index event_time_idx on events using BRIN (event_time);
	create index event_json_idx on events using gin(payload);
	select create_distributed_table('events', 'user_id'); -- on purpose don't collocate on correctly on org_id


	create table local_data(
	id bigserial primary key
	, val int default ( (random()*100)::int )
	);


	insert into orgs(id, name) select i,'org-'\|\|i from generate_series(1,1000) i;
	insert into users(id, name, org_id, country_id) select i,'user-'\|\|i, i%1000+1, (i%90)+1 from generate_series(1,100000) i;
	insert into orders(id, org_id, user_id, price) select i, ((i%100000+1)%1000)+1 , i%100000+1, i/100 from generate_series(1,1000000) i;
	insert into events(id, org_id, user_id, event_type) select i, ((i%100000+1)%1000)+1 , i%100000+1, i/100 from generate_series(1,1000000) i;
	insert into local_data(id) select generate_series(1,1000);