Skip to content

Instantly share code, notes, and snippets.

@sfriquet
Last active October 25, 2023 09:58
Show Gist options
  • Save sfriquet/861a0d31c6c45fd84ae418c006895179 to your computer and use it in GitHub Desktop.
Save sfriquet/861a0d31c6c45fd84ae418c006895179 to your computer and use it in GitHub Desktop.
Citus Cheatsheet
-- Set number of shard
ALTER DATABASE citus SET citus.shard_count = 64;
-- Explain across all shards
SET citus.explain_all_tasks TO true;
-- Size of all distributed tables excluding indexes but including TOAST, free space map, and visibility map
SELECT logicalrelid::text tablename, pg_size_pretty(citus_table_size(logicalrelid::text)) size FROM pg_dist_partition group by tablename;
-- Size of all distributed tables including all indexes and TOAST data
SELECT logicalrelid::text tablename, pg_size_pretty(citus_total_relation_size(logicalrelid::text)) size FROM pg_dist_partition group by tablename;
-- Get shard id from dist key value
SELECT get_shard_id_for_distribution_column('tablename', 'dist_value');
-- Get node from shard id
SELECT * FROM pg_dist_shard_placement WHERE shardid = 103258;
-- Get cluster size
SELECT pg_size_pretty(sum(result::bigint)) as size, count(nodename) as node_count FROM (
SELECT nodename,nodeport,result
FROM run_command_on_workers($cmd$SELECT pg_database_size('citus');$cmd$)
) a;
-- Get node sizes w/ shard count
WITH shard_sizes AS(
SELECT shardid, result::bigint size FROM
(
SELECT (run_command_on_shards(logicalrelid::text,$cmd$SELECT pg_total_relation_size('%s')$cmd$)).*
FROM pg_dist_partition pp
WHERE pp.partmethod = 'h'
-- AND pp.logicalrelid::text = '$tablename'
)a
)
SELECT nodename, count(*) as shard_count, pg_size_pretty(sum(group_size))
FROM
(
SELECT nodename, array_agg(ps.shardid) shard_group, sum(size) group_size
FROM shard_sizes ss, pg_dist_shard ps, pg_dist_shard_placement psp, pg_dist_partition pp
WHERE (ss.shardid=ps.shardid AND pp.logicalrelid=ps.logicalrelid AND psp.shardid=ps.shardid AND pp.partmethod='h')
GROUP BY shardmaxvalue, shardminvalue, nodename, colocationid
) a
GROUP BY nodename
ORDER BY sum(group_size) DESC;
-- Get shard sizes
WITH shard_sizes AS(
SELECT shardid, result::bigint size FROM
(
SELECT (run_command_on_shards(logicalrelid::text,$cmd$SELECT pg_total_relation_size('%s')$cmd$)).*
FROM pg_dist_partition pp
WHERE pp.partmethod = 'h'
-- AND pp.logicalrelid::text = '$tablename'
)a
)
SELECT colocationid, nodename, CASE WHEN shard_group IS NULL THEN NULL ELSE shard_group[1] END shardid, pg_size_pretty(group_size)
FROM
(
SELECT colocationid, nodename, array_agg(ps.shardid) shard_group, sum(size) group_size
FROM shard_sizes ss, pg_dist_shard ps, pg_dist_shard_placement psp, pg_dist_partition pp
WHERE (ss.shardid = ps.shardid AND pp.logicalrelid = ps.logicalrelid AND psp.shardid = ps.shardid AND pp.partmethod = 'h')
GROUP BY shardmaxvalue, shardminvalue, nodename, colocationid
) a
ORDER BY group_size DESC;
-- Figuring out least/most crowded shard for rebalancing: https://www.citusdata.com/blog/2018/02/28/fun-with-sql-relocating-citus-shards/
-- Most Active Tenant (from https://www.citusdata.com/blog/2018/07/31/introducing-landlord-per-tenant-stats)
SELECT partition_key as tenant_id,
count(*) as tenant_unique_queries,
sum(c.calls) as tenant_total_queries,
sum(total_time) as total_query_time
FROM citus_stat_statements c
JOIN pg_stat_statements p
ON c.queryid = p.queryid
WHERE partition_key is not null
GROUP BY tenant_id
ORDER BY tenant_total_queries DESC
LIMIT 50;
-- Rebalance plan (for one shard, omit parameter for all)
SELECT (get_rebalance_table_shards_plan('table_name', max_shard_moves := 1)).*
-- How much data will be moved if rebalancing
SELECT pg_size_pretty(sum(shard_size)) FROM (SELECT (get_rebalance_table_shards_plan('table_name', max_shard_moves := 1)).shard_size) a;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment