SELECT pid, datname, usename, current_timestamp - xact_start AS xact_runtime, query
FROM pg_stat_activity
WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%' AND query ILIKE '%vacuum%'
ORDER BY xact_start;
Tables need to be vacuumed before the xid reaches 2.1B or it will wrap around and require down time of hours up to days to sort itself out. RDS seems to keep track of this and handle it automatically.
SELECT
c.oid::regclass AS table_name,
greatest(age(c.relfrozenxid), age(t.relfrozenxid)) AS age,
pg_size_pretty(pg_table_size(c.oid)) AS table_size
FROM pg_class c
LEFT JOIN pg_class t ON c.reltoastrelid = t.oid
WHERE c.relkind = 'r'
ORDER BY 2 DESC LIMIT 20;
SELECT
pg_stat_user_tables.relname,
pg_stat_user_tables.n_dead_tup,
50 + 0.1 * pg_class.reltuples as vacuum_threshold,
pg_stat_user_tables.n_live_tup,
pg_stat_user_tables.n_tup_del,
pg_stat_user_tables.n_tup_upd,
pg_stat_user_tables.autovacuum_count,
pg_stat_user_tables.last_vacuum,
pg_stat_user_tables.last_autovacuum,
now() as now,
pg_class.reltuples,
pg_stat_user_tables.n_dead_tup > (50 + 0.1 * pg_class.reltuples) as is_vacuum
FROM
pg_stat_user_tables INNER JOIN pg_class ON pg_stat_user_tables.relname = pg_class.relname
ORDER BY
pg_stat_user_tables.n_dead_tup > (50 + 0.1 * pg_class.reltuples) DESC,
n_dead_tup DESC;
SELECT category, name, setting, unit, source, min_val, max_val, boot_val FROM pg_settings WHERE category = 'Autovacuum';
You can set the scale factor to zero to disable that trigger and only rely on the threshold, if your workload is constant. You can also set the threshold to -1 to disable that trigger and only rely on the scale factor.
ALTER TABLE some_table
SET autovacuum_analyze_scale_factor = 0,
autovacuum_analyze_threshold = 100000,
autovacuum_vacuum_scale_factor = 0,
autovacuum_vacuum_threshold = 400000;
for part in $(seq 0 127); do
psql -U <user> -w -h <host> -p 5432 <db> \
-c "ALTER TABLE partitioned_table_$(printf '%02x' $part) SET (autovacuum_analyze_scale_factor = 0.4, autovacuum_vacuum_scale_factor = 0.5);"
done
Postgres has immutable tuples and never updates them. HOT updates replace the existing tuple and are more efficient, but only work if the new data fits. The fill-factor is a per-table setting that indicates the percentage of pages that are used for the actual tuple data. When it's less than 100% then the remaining percentage is reserved for future updates so that more of them can be HOT updates. It defaults to 100% and you need to run pg_repack or VACUUM FULL
to take advantage of the new setting.
SELECT schemaname, relname, n_tup_upd, n_tup_hot_upd FROM pg_stat_all_tables ORDER BY n_tup_upd;
ALTER TABLE some_table SET (fillfactor = 80);
for part in $(seq 0 127); do
psql -U <user> -w -h <host> -p 5432 <db> \
-c "ALTER TABLE partitioned_table_$(printf '%02x' $part) SET (fillfactor = 80);"
done
SELECT pg_database_size('<db>'), pg_size_pretty(pg_database_size('<db>'));
SELECT *, total_bytes - index_bytes - COALESCE(toast_bytes, 0) AS table_bytes, row_estimate FROM (
SELECT c.oid, nspname AS table_schema, relname AS table_name,
c.reltuples AS row_estimate,
pg_total_relation_size(c.oid) AS total_bytes,
pg_indexes_size(c.oid) AS index_bytes,
pg_total_relation_size(reltoastrelid) AS toast_bytes
FROM pg_class c
LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE relkind = 'r'
) a;
SELECT
relid::regclass AS table_name,
indexrelid::regclass AS index_name,
pg_relation_size(indexrelid::regclass) AS size,
pg_size_pretty(pg_relation_size(indexrelid::regclass)) AS human_size,
idx_tup_read,
idx_tup_fetch,
idx_scan
FROM pg_stat_user_indexes
JOIN pg_index USING (indexrelid)
WHERE indisunique IS FALSE AND idx_scan = 0
ORDER BY size DESC;
SELECT
relid::regclass AS table_name,
indexrelid::regclass AS index_name,
pg_relation_size(indexrelid::regclass) AS size,
pg_size_pretty(pg_relation_size(indexrelid::regclass)) AS human_size,
idx_tup_read,
idx_tup_fetch,
idx_scan
FROM pg_stat_user_indexes
JOIN pg_index USING (indexrelid)
WHERE indisunique IS FALSE
ORDER BY size DESC, idx_scan;
Creating an index concurrently also has a downside though. If something goes wrong during the process, it does not roll back, and leaves an invalid index behind. Invalid indexes can be found using the following query.
SELECT * FROM pg_class, pg_index WHERE pg_index.indisvalid = false AND pg_index.indexrelid = pg_class.oid;
Source: https://blog.gojekengineering.com/the-postgres-performance-tuning-manual-indexes-19a1bdfe28ba
See also: https://www.postgresql.org/docs/current/sql-createindex.html#SQL-CREATEINDEX-CONCURRENTLY
These were all adapted from a query found on the Postgres wiki. That query was taken from check_postgres and there's a newer version there but I'm not sure if we care about any of the changes.
See the various .sql files in this directory.
SELECT
pg_size_pretty(SUM(CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::BIGINT END)) AS wastedbytes,
pg_size_pretty(SUM(CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta)::BIGINT END)) AS wastedibytes,
pg_size_pretty(
SUM(CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::BIGINT END)
+ SUM(CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta)::BIGINT END)
) AS total
FROM (
SELECT
schemaname, tablename, cc.reltuples, cc.relpages, bs,
CEIL((cc.reltuples*((datahdr+ma-
(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta,
COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
FROM (
SELECT
ma,bs,schemaname,tablename,
(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
FROM (
SELECT
schemaname, tablename, hdr, ma, bs,
SUM((1-null_frac)*avg_width) AS datawidth,
MAX(null_frac) AS maxfracsum,
hdr+(
SELECT 1+count(*)/8
FROM pg_stats s2
WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
) AS nullhdr
FROM pg_stats s, (
SELECT
(SELECT current_setting('block_size')::numeric) AS bs,
CASE WHEN substring(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma
FROM (SELECT version() AS v) AS foo
) AS constants
GROUP BY 1,2,3,4,5
) AS foo
) AS rs
JOIN pg_class cc ON cc.relname = rs.tablename
JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
LEFT JOIN pg_index i ON indrelid = cc.oid
LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
) AS sml;
All tables, using .pgpass for authentication:
bin/pg_repack \
--no-superuser-check \
--host=<host> \
--port=5432 \
--username=<user> \
--no-password \
--dbname=<db>
WARNING: Repacking our giant parent tables never works because they're busy and we can't lock them long enough, and there's no actual data so repacking is pointless anyway. Don't try to repack those in any automated maintenance or it will likely fail.
One or more specific tables:
bin/pg_repack \
--no-superuser-check \
--host=<host> \
--port=5432 \
--username=<user> \
--no-password \
--dbname=<db> \
--table=<t1> \
--table=<t2> \
--table=<t3...>
Use the file ~/.pgpass to hold the password. Like your ssh config it should have the permissions 0600. The format is:
<host>:<port>:<db>:<user>:<password>
e.g.
<host>:5432:<user>:<db>:<password>
for part in $(seq 0 127); do
bin/pg_repack \
--no-superuser-check \
--host=<host> \
--port=5432 \
--username=<user> \
--no-password \
--dbname=<db> \
--table=public.partitioned_table_$(printf '%02x' $part)
done
SELECT pg_stat_reset();
Each buffer is 8k so the total buffer cache size is 8k * row count of pg_buffercache
.
Show the breakdown for all databases:
SELECT CASE WHEN c.reldatabase IS NULL THEN ''
WHEN c.reldatabase = 0 THEN ''
ELSE d.datname
END AS database,
count(*) AS cached_blocks
FROM pg_buffercache AS c
LEFT JOIN pg_database AS d
ON c.reldatabase = d.oid
GROUP BY d.datname, c.reldatabase
ORDER BY d.datname, c.reldatabase;
Show the top 40 tables/indexes in the current database:
SELECT n.nspname, c.relname, count(*) AS buffers
FROM pg_buffercache b JOIN pg_class c
ON b.relfilenode = pg_relation_filenode(c.oid) AND
b.reldatabase IN (0, (SELECT oid FROM pg_database
WHERE datname = current_database()))
JOIN pg_namespace n ON n.oid = c.relnamespace
GROUP BY n.nspname, c.relname
ORDER BY 3 DESC
LIMIT 40;