Skip to content

Instantly share code, notes, and snippets.

@zhjwpku
Forked from sfriquet/citus_cheatsheet.sql
Last active November 9, 2023 04:33
Show Gist options
  • Save zhjwpku/5035bb7108cb39685faae0e2a0a33f13 to your computer and use it in GitHub Desktop.
Save zhjwpku/5035bb7108cb39685faae0e2a0a33f13 to your computer and use it in GitHub Desktop.
Citus Cheatsheet
-- Set number of shard
ALTER DATABASE citus SET citus.shard_count = 64;
-- distribute the table
SELECT create_distributed_table('github_events', 'repo_id');
-- undo that and make it local again
SELECT undistribute_table('github_events');
-- change distribution column
SELECT alter_distributed_table('github_events', distribution_column:='event_id');
-- change shard count of all tables in colocation group
SELECT alter_distributed_table('github_events', shard_count:=6, cascade_to_colocated:=true);
-- change colocation
SELECT alter_distributed_table('github_events', colocate_with:='another_table');
-- change access method of a table
SELECT alter_table_set_access_method('github_events', 'columnar');
-- Explain across all shards
SET citus.explain_all_tasks TO true;
-- Size of actual data in table (the `main` fork): https://docs.citusdata.com/en/v12.1/admin_guide/table_management.html#determining-table-and-relation-size
SELECT logicalrelid AS name, pg_size_pretty(citus_relation_size(logicalrelid)) AS size FROM pg_dist_partition;
-- Size of all distributed tables excluding indexes but including TOAST, free space map, and visibility map
SELECT logicalrelid::text tablename, pg_size_pretty(citus_table_size(logicalrelid::text)) size FROM pg_dist_partition group by tablename;
-- Size of all distributed tables including all indexes and TOAST data
SELECT logicalrelid::text tablename, pg_size_pretty(citus_total_relation_size(logicalrelid::text)) size FROM pg_dist_partition group by tablename;
-- Get shard id from dist key value
SELECT get_shard_id_for_distribution_column('tablename', 'dist_value');
-- Get node from shard id
SELECT * FROM pg_dist_shard_placement WHERE shardid = 103258;
-- Get cluster size
SELECT pg_size_pretty(sum(result::bigint)) as size, count(nodename) as node_count FROM (
SELECT nodename,nodeport,result
FROM run_command_on_workers($cmd$SELECT pg_database_size('citus');$cmd$)
) a;
-- Get node sizes w/ shard count
WITH shard_sizes AS(
SELECT shardid, result::bigint size FROM
(
SELECT (run_command_on_shards(logicalrelid::text,$cmd$SELECT pg_total_relation_size('%s')$cmd$)).*
FROM pg_dist_partition pp
WHERE pp.partmethod = 'h'
-- AND pp.logicalrelid::text = '$tablename'
)a
)
SELECT nodename, count(*) as shard_count, pg_size_pretty(sum(group_size))
FROM
(
SELECT nodename, array_agg(ps.shardid) shard_group, sum(size) group_size
FROM shard_sizes ss, pg_dist_shard ps, pg_dist_shard_placement psp, pg_dist_partition pp
WHERE (ss.shardid=ps.shardid AND pp.logicalrelid=ps.logicalrelid AND psp.shardid=ps.shardid AND pp.partmethod='h')
GROUP BY shardmaxvalue, shardminvalue, nodename, colocationid
) a
GROUP BY nodename
ORDER BY sum(group_size) DESC;
-- Get shard sizes
WITH shard_sizes AS(
SELECT shardid, result::bigint size FROM
(
SELECT (run_command_on_shards(logicalrelid::text,$cmd$SELECT pg_total_relation_size('%s')$cmd$)).*
FROM pg_dist_partition pp
WHERE pp.partmethod = 'h'
-- AND pp.logicalrelid::text = '$tablename'
)a
)
SELECT colocationid, nodename, CASE WHEN shard_group IS NULL THEN NULL ELSE shard_group[1] END shardid, pg_size_pretty(group_size)
FROM
(
SELECT colocationid, nodename, array_agg(ps.shardid) shard_group, sum(size) group_size
FROM shard_sizes ss, pg_dist_shard ps, pg_dist_shard_placement psp, pg_dist_partition pp
WHERE (ss.shardid = ps.shardid AND pp.logicalrelid = ps.logicalrelid AND psp.shardid = ps.shardid AND pp.partmethod = 'h')
GROUP BY shardmaxvalue, shardminvalue, nodename, colocationid
) a
ORDER BY group_size DESC;
-- Figuring out least/most crowded shard for rebalancing: https://www.citusdata.com/blog/2018/02/28/fun-with-sql-relocating-citus-shards/
-- Most Active Tenant (from https://www.citusdata.com/blog/2018/07/31/introducing-landlord-per-tenant-stats)
SELECT partition_key as tenant_id,
count(*) as tenant_unique_queries,
sum(c.calls) as tenant_total_queries,
sum(total_time) as total_query_time
FROM citus_stat_statements c
JOIN pg_stat_statements p
ON c.queryid = p.queryid
WHERE partition_key is not null
GROUP BY tenant_id
ORDER BY tenant_total_queries DESC
LIMIT 50;
-- Rebalance plan (for one shard, omit parameter for all)
SELECT (get_rebalance_table_shards_plan('table_name', max_shard_moves := 1)).*
-- How much data will be moved if rebalancing
SELECT pg_size_pretty(sum(shard_size)) FROM (SELECT (get_rebalance_table_shards_plan('table_name', max_shard_moves := 1)).shard_size) a;
-- Get visibility of blocked operations: https://www.citusdata.com/blog/2018/02/15/when-postgresql-blocks/
WITH citus_xacts AS (
SELECT * FROM get_all_active_transactions() WHERE initiator_node_identifier = 0
),
citus_wait_pids AS (
SELECT
(SELECT process_id FROM citus_xacts WHERE transaction_number = waiting_transaction_num) AS waiting_pid,
(SELECT process_id FROM citus_xacts WHERE transaction_number = blocking_transaction_num) AS blocking_pid
FROM
dump_global_wait_edges()
)
SELECT
waiting_pid AS blocked_pid,
blocking_pid,
waiting.query AS blocked_statement,
blocking.query AS current_statement_in_blocking_process
FROM
citus_wait_pids
JOIN
pg_stat_activity waiting ON (waiting_pid = waiting.pid)
JOIN
pg_stat_activity blocking ON (blocking_pid = blocking.pid);
-- Run command on all workers
SELECT run_command_on_workers($$ ALTER SYSTEM SET max_locks_per_transaction = 128; $$);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment