Skip to content

Instantly share code, notes, and snippets.

@samsonjs
Last active April 5, 2023 22:17
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samsonjs/175d090c8f4c3c10fb4557dccc404758 to your computer and use it in GitHub Desktop.
Save samsonjs/175d090c8f4c3c10fb4557dccc404758 to your computer and use it in GitHub Desktop.
A collection of useful Postgres snippets that I've collected from the web
WITH
constants AS (
SELECT
current_setting('block_size')::numeric AS bs,
-- What is this? Some header size?
23 AS hdr,
-- What is this? could be the word size or some other basic unit as it was defined as 4 by default
-- and 8 on 64-bit machines in the original query.
8 AS ma
),
table_info AS (
SELECT
schemaname,
tablename,
-- Average size of non-null column entries (per row)
SUM((1 - null_frac) * avg_width) AS avg_row_size,
-- Fraction of column entries that are null
MAX(null_frac) AS max_null_frac,
-- What is nullhdr? Counts tables that have null column entries and divides by 8, but why? Why add hdr to this?
hdr + (
SELECT 1 + count(*) / 8
FROM pg_stats s2
WHERE null_frac > 0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
) AS nullhdr
FROM pg_stats s, constants c
WHERE schemaname <> 'information_schema' AND schemaname <> 'pg_catalog'
GROUP BY 1, 2, c.hdr, c.ma, c.bs
),
row_size AS (
SELECT
tablename,
-- What is this? Looks like average data size per row.
(avg_row_size + (hdr + ma - (CASE WHEN hdr % ma = 0 THEN ma ELSE hdr % ma END)))::numeric AS datahdr,
-- What is this? Looks like average null size per row.
(max_null_frac * (nullhdr + ma - (CASE WHEN nullhdr % ma = 0 THEN ma ELSE nullhdr % ma END))) AS nullhdr2
FROM constants, table_info
),
estimate AS (
SELECT
tablename,
COALESCE(c_index.relname, '?') AS index_name,
COALESCE(c_index.reltuples, 0)::bigint AS num_tuples,
COALESCE(c_index.relpages, 0)::bigint AS num_pages,
-- Estimated optimal number of pages required.
-- Original comment: "Very rough approximation, assumes all cols"
COALESCE(CEIL((c_index.reltuples * (datahdr - 12)) / (bs - 20::float)), 0) AS iotta
FROM constants, row_size
JOIN pg_class c_table ON c_table.relname = row_size.tablename
LEFT JOIN pg_index ON indrelid = c_table.oid
LEFT JOIN pg_class c_index ON c_index.oid = pg_index.indexrelid
),
bloat AS (
SELECT
tablename AS table_name,
index_name,
num_tuples,
num_pages,
iotta AS optimal_num_pages,
ROUND((CASE WHEN iotta = 0 OR num_pages = 0 THEN 0.0 ELSE num_pages::float / iotta END)::numeric, 2) AS bloat_ratio,
CASE WHEN num_pages < iotta THEN 0 ELSE bs * (num_pages - iotta)::bigint END AS wasted_bytes
FROM constants, estimate
),
result AS (
SELECT
table_name,
SUM(num_tuples) AS idx_tuples,
SUM(num_pages) AS idx_pages,
SUM(optimal_num_pages) AS idx_optimal_pages,
MAX(bloat_ratio) AS max_bloat_ratio,
SUM(wasted_bytes) AS wasted_bytes
FROM bloat
GROUP BY table_name
)
SELECT * FROM result ORDER BY wasted_bytes DESC, max_bloat_ratio DESC;
WITH
constants AS (
SELECT
current_setting('block_size')::numeric AS bs,
-- What is this? Some header size?
23 AS hdr,
-- What is this? could be the word size or some other basic unit as it was defined as 4 by default
-- and 8 on 64-bit machines in the original query.
8 AS ma
),
table_info AS (
SELECT
schemaname,
tablename,
-- Average size of non-null column entries (per row)
SUM((1 - null_frac) * avg_width) AS avg_row_size,
-- Fraction of column entries that are null
MAX(null_frac) AS max_null_frac,
-- What is nullhdr? Counts tables that have null column entries and divides by 8, but why? Why add hdr to this?
hdr + (
SELECT 1 + count(*) / 8
FROM pg_stats s2
WHERE null_frac > 0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
) AS nullhdr
FROM pg_stats s, constants c
WHERE schemaname <> 'information_schema' AND schemaname <> 'pg_catalog'
GROUP BY 1, 2, c.hdr, c.ma, c.bs
),
row_size AS (
SELECT
schemaname,
tablename,
-- What is this? Looks like average data size per row.
(avg_row_size + (hdr + ma - (CASE WHEN hdr % ma = 0 THEN ma ELSE hdr % ma END)))::numeric AS datahdr,
-- What is this? Looks like average null size per row.
(max_null_frac * (nullhdr + ma - (CASE WHEN nullhdr % ma = 0 THEN ma ELSE nullhdr % ma END))) AS nullhdr2
FROM constants, table_info
),
estimate AS (
SELECT
tablename,
COALESCE(c_index.relname, '?') AS index_name,
COALESCE(c_index.reltuples, 0)::bigint AS num_tuples,
COALESCE(c_index.relpages, 0)::bigint AS num_pages,
-- Estimated optimal number of pages required.
-- Original comment: "Very rough approximation, assumes all cols"
COALESCE(CEIL((c_index.reltuples * (datahdr - 12)) / (bs - 20::float)), 0) AS iotta
FROM constants, row_size
JOIN pg_class c_table ON c_table.relname = row_size.tablename
LEFT JOIN pg_index ON indrelid = c_table.oid
LEFT JOIN pg_class c_index ON c_index.oid = pg_index.indexrelid
),
result AS (
SELECT
tablename AS table_name,
index_name,
num_tuples,
num_pages,
iotta AS optimal_num_pages,
ROUND((CASE WHEN iotta = 0 OR num_pages = 0 THEN 0.0 ELSE num_pages::float / iotta END)::numeric, 2) AS bloat_ratio,
CASE WHEN num_pages < iotta THEN 0 ELSE bs * (num_pages - iotta)::bigint END AS wasted_bytes
FROM constants, estimate
)
SELECT * FROM result ORDER BY wasted_bytes DESC, bloat_ratio DESC;

Postgres admin

Vacuuming

See what's currently being vacuumed

SELECT pid, datname, usename, current_timestamp - xact_start AS xact_runtime, query
FROM pg_stat_activity
WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%' AND query ILIKE '%vacuum%'
ORDER BY xact_start;

Find large transaction IDs

Tables need to be vacuumed before the xid reaches 2.1B or it will wrap around and require down time of hours up to days to sort itself out. RDS seems to keep track of this and handle it automatically.

SELECT
  c.oid::regclass AS table_name,
  greatest(age(c.relfrozenxid), age(t.relfrozenxid)) AS age,
  pg_size_pretty(pg_table_size(c.oid)) AS table_size
FROM pg_class c
LEFT JOIN pg_class t ON c.reltoastrelid = t.oid
WHERE c.relkind = 'r'
ORDER BY 2 DESC LIMIT 20;

Find dead tuples and last auto-vacuum time

SELECT
   pg_stat_user_tables.relname,
   pg_stat_user_tables.n_dead_tup,
   50 + 0.1 * pg_class.reltuples as vacuum_threshold,
   pg_stat_user_tables.n_live_tup,
   pg_stat_user_tables.n_tup_del,
   pg_stat_user_tables.n_tup_upd,
   pg_stat_user_tables.autovacuum_count,
   pg_stat_user_tables.last_vacuum,
   pg_stat_user_tables.last_autovacuum,
   now() as now,
   pg_class.reltuples,
   pg_stat_user_tables.n_dead_tup > (50 + 0.1 * pg_class.reltuples) as is_vacuum
FROM
   pg_stat_user_tables INNER JOIN pg_class ON pg_stat_user_tables.relname = pg_class.relname
ORDER BY
   pg_stat_user_tables.n_dead_tup > (50 + 0.1 * pg_class.reltuples) DESC,
   n_dead_tup DESC;

Show vacuum settings that can be tuned

SELECT category, name, setting, unit, source, min_val, max_val, boot_val FROM pg_settings WHERE category = 'Autovacuum';

Change vacuum settings for specific tables

You can set the scale factor to zero to disable that trigger and only rely on the threshold, if your workload is constant. You can also set the threshold to -1 to disable that trigger and only rely on the scale factor.

ALTER TABLE some_table
SET autovacuum_analyze_scale_factor = 0,
    autovacuum_analyze_threshold = 100000,
    autovacuum_vacuum_scale_factor = 0,
    autovacuum_vacuum_threshold = 400000;

Update autovacuum scales for a partitioned table

for part in $(seq 0 127); do
    psql -U <user> -w -h <host> -p 5432 <db> \
      -c "ALTER TABLE partitioned_table_$(printf '%02x' $part) SET (autovacuum_analyze_scale_factor = 0.4, autovacuum_vacuum_scale_factor = 0.5);"
done

Heap-Only Tuple (HOT) Updates

Postgres has immutable tuples and never updates them. HOT updates replace the existing tuple and are more efficient, but only work if the new data fits. The fill-factor is a per-table setting that indicates the percentage of pages that are used for the actual tuple data. When it's less than 100% then the remaining percentage is reserved for future updates so that more of them can be HOT updates. It defaults to 100% and you need to run pg_repack or VACUUM FULL to take advantage of the new setting.

Query the status of HOT updates

SELECT schemaname, relname, n_tup_upd, n_tup_hot_upd FROM pg_stat_all_tables ORDER BY n_tup_upd;

Update the fill-factor for a table

ALTER TABLE some_table SET (fillfactor = 80);

Update the fill factor for partitioned tables

for part in $(seq 0 127); do
    psql -U <user> -w -h <host> -p 5432 <db> \
      -c "ALTER TABLE partitioned_table_$(printf '%02x' $part) SET (fillfactor = 80);"
done

Storage size

Total database size

SELECT pg_database_size('<db>'), pg_size_pretty(pg_database_size('<db>'));

Basic stats on all tables (including PG tables)

    SELECT *, total_bytes - index_bytes - COALESCE(toast_bytes, 0) AS table_bytes, row_estimate FROM (
      SELECT c.oid, nspname AS table_schema, relname AS table_name,
             c.reltuples AS row_estimate,
             pg_total_relation_size(c.oid) AS total_bytes,
             pg_indexes_size(c.oid) AS index_bytes,
             pg_total_relation_size(reltoastrelid) AS toast_bytes
      FROM pg_class c
      LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
      WHERE relkind = 'r'
    ) a;

Indexes

Find unused indexes

SELECT
  relid::regclass AS table_name,
  indexrelid::regclass AS index_name,
  pg_relation_size(indexrelid::regclass) AS size,
  pg_size_pretty(pg_relation_size(indexrelid::regclass)) AS human_size,
  idx_tup_read,
  idx_tup_fetch,
  idx_scan
FROM pg_stat_user_indexes
JOIN pg_index USING (indexrelid)
WHERE indisunique IS FALSE AND idx_scan = 0
ORDER BY size DESC;

Find largest indexes

SELECT
  relid::regclass AS table_name,
  indexrelid::regclass AS index_name,
  pg_relation_size(indexrelid::regclass) AS size,
  pg_size_pretty(pg_relation_size(indexrelid::regclass)) AS human_size,
  idx_tup_read,
  idx_tup_fetch,
  idx_scan
FROM pg_stat_user_indexes
JOIN pg_index USING (indexrelid)
WHERE indisunique IS FALSE
ORDER BY size DESC, idx_scan;

Find invalid indexes

Creating an index concurrently also has a downside though. If something goes wrong during the process, it does not roll back, and leaves an invalid index behind. Invalid indexes can be found using the following query.

SELECT * FROM pg_class, pg_index WHERE pg_index.indisvalid = false AND pg_index.indexrelid = pg_class.oid;

Source: https://blog.gojekengineering.com/the-postgres-performance-tuning-manual-indexes-19a1bdfe28ba

See also: https://www.postgresql.org/docs/current/sql-createindex.html#SQL-CREATEINDEX-CONCURRENTLY

Bloat

These were all adapted from a query found on the Postgres wiki. That query was taken from check_postgres and there's a newer version there but I'm not sure if we care about any of the changes.

Top bloated tables and indexes

See the various .sql files in this directory.

Total table and index bloat

SELECT
  pg_size_pretty(SUM(CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::BIGINT END)) AS wastedbytes,
  pg_size_pretty(SUM(CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta)::BIGINT END)) AS wastedibytes,
  pg_size_pretty(
      SUM(CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::BIGINT END)
      + SUM(CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta)::BIGINT END)
  ) AS total
FROM (
  SELECT
    schemaname, tablename, cc.reltuples, cc.relpages, bs,
    CEIL((cc.reltuples*((datahdr+ma-
      (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta,
    COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
    COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
  FROM (
    SELECT
      ma,bs,schemaname,tablename,
      (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
      (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
    FROM (
      SELECT
        schemaname, tablename, hdr, ma, bs,
        SUM((1-null_frac)*avg_width) AS datawidth,
        MAX(null_frac) AS maxfracsum,
        hdr+(
          SELECT 1+count(*)/8
          FROM pg_stats s2
          WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
        ) AS nullhdr
      FROM pg_stats s, (
        SELECT
          (SELECT current_setting('block_size')::numeric) AS bs,
          CASE WHEN substring(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
          CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma
        FROM (SELECT version() AS v) AS foo
      ) AS constants
      GROUP BY 1,2,3,4,5
    ) AS foo
  ) AS rs
  JOIN pg_class cc ON cc.relname = rs.tablename
  JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
  LEFT JOIN pg_index i ON indrelid = cc.oid
  LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
) AS sml;

pg_repack

All tables, using .pgpass for authentication:

bin/pg_repack \
    --no-superuser-check \
    --host=<host> \
    --port=5432 \
    --username=<user> \
    --no-password \
    --dbname=<db>

WARNING: Repacking our giant parent tables never works because they're busy and we can't lock them long enough, and there's no actual data so repacking is pointless anyway. Don't try to repack those in any automated maintenance or it will likely fail.

One or more specific tables:

bin/pg_repack \
        --no-superuser-check \
        --host=<host> \
        --port=5432 \
        --username=<user> \
        --no-password \
        --dbname=<db> \
        --table=<t1> \
        --table=<t2> \
        --table=<t3...>

pg_repack partioned tables

Use the file ~/.pgpass to hold the password. Like your ssh config it should have the permissions 0600. The format is:

<host>:<port>:<db>:<user>:<password>

e.g.

<host>:5432:<user>:<db>:<password>

for part in $(seq 0 127); do
    bin/pg_repack \
        --no-superuser-check \
        --host=<host> \
        --port=5432 \
        --username=<user> \
        --no-password \
        --dbname=<db> \
        --table=public.partitioned_table_$(printf '%02x' $part)
done

Reset stats

SELECT pg_stat_reset();

Buffer Cache

Each buffer is 8k so the total buffer cache size is 8k * row count of pg_buffercache.

Show the breakdown for all databases:

SELECT CASE WHEN c.reldatabase IS NULL THEN ''
WHEN c.reldatabase = 0 THEN ''
ELSE d.datname
END AS database,
count(*) AS cached_blocks
FROM pg_buffercache AS c
LEFT JOIN pg_database AS d
ON c.reldatabase = d.oid
GROUP BY d.datname, c.reldatabase
ORDER BY d.datname, c.reldatabase;

Show the top 40 tables/indexes in the current database:

SELECT n.nspname, c.relname, count(*) AS buffers
FROM pg_buffercache b JOIN pg_class c
ON b.relfilenode = pg_relation_filenode(c.oid) AND
  b.reldatabase IN (0, (SELECT oid FROM pg_database
                        WHERE datname = current_database()))
JOIN pg_namespace n ON n.oid = c.relnamespace
GROUP BY n.nspname, c.relname
ORDER BY 3 DESC
LIMIT 40;

References

WITH
constants AS (
SELECT
current_setting('block_size')::numeric AS bs,
-- What is this? Some header size?
23 AS hdr,
-- What is this? could be the word size or some other basic unit as it was defined as 4 by default
-- and 8 on 64-bit machines in the original query.
8 AS ma
),
table_info AS (
SELECT
schemaname,
tablename,
-- Average size of non-null column entries (per row)
SUM((1 - null_frac) * avg_width) AS avg_row_size,
-- Fraction of column entries that are null
MAX(null_frac) AS max_null_frac,
-- What is nullhdr? Counts tables that have null column entries and divides by 8, but why? Why add hdr to this?
hdr + (
SELECT 1 + count(*) / 8
FROM pg_stats s2
WHERE null_frac > 0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
) AS nullhdr
FROM pg_stats s, constants c
WHERE schemaname <> 'information_schema' AND schemaname <> 'pg_catalog'
GROUP BY 1, 2, c.hdr, c.ma, c.bs
),
row_size AS (
SELECT
tablename,
-- What is this? Looks like average data size per row.
(avg_row_size + (hdr + ma - (CASE WHEN hdr % ma = 0 THEN ma ELSE hdr % ma END)))::numeric AS datahdr,
-- What is this? Looks like average null size per row.
(max_null_frac * (nullhdr + ma - (CASE WHEN nullhdr % ma = 0 THEN ma ELSE nullhdr % ma END))) AS nullhdr2
FROM constants, table_info
),
estimate AS (
SELECT
tablename,
COALESCE(c_index.relname, '?') AS index_name,
c_table.reltuples::bigint AS num_tuples,
c_table.relpages::bigint AS num_pages,
COALESCE(c_index.reltuples, 0)::bigint AS idx_tuples,
COALESCE(c_index.relpages, 0)::bigint AS idx_pages,
-- Estimated optimal number of pages required.
CEIL(
(c_table.reltuples * ((datahdr + ma - (CASE WHEN datahdr % ma = 0 THEN ma ELSE datahdr % ma END)) + nullhdr2 + 4)) / (bs - 20::float)
) AS otta,
-- Estimated optimal number of pages required.
-- Original comment: "Very rough approximation, assumes all cols"
COALESCE(CEIL((c_index.reltuples * (datahdr - 12)) / (bs - 20::float)), 0) AS idx_otta
FROM constants, row_size
JOIN pg_class c_table ON c_table.relname = row_size.tablename
LEFT JOIN pg_index ON indrelid = c_table.oid
LEFT JOIN pg_class c_index ON c_index.oid = pg_index.indexrelid
),
bloat AS (
SELECT
tablename AS table_name,
index_name,
num_tuples,
num_pages,
otta AS optimal_num_pages,
ROUND((CASE WHEN otta = 0 OR num_pages = 0 THEN 0.0 ELSE num_pages::float / otta END)::numeric, 2) AS bloat_ratio,
CASE WHEN num_pages < otta THEN 0 ELSE bs * (num_pages - otta)::bigint END AS wasted_bytes,
idx_tuples,
idx_pages,
idx_otta AS idx_optimal_pages,
ROUND((CASE WHEN idx_otta = 0 OR idx_pages = 0 THEN 0.0 ELSE idx_pages::float / idx_otta END)::numeric, 2) AS idx_bloat_ratio,
CASE WHEN idx_pages < idx_otta THEN 0 ELSE bs * (idx_pages - idx_otta)::bigint END AS idx_wasted_bytes
FROM constants, estimate
),
result AS (
SELECT
table_name,
MAX(num_tuples) AS num_tuples,
MAX(num_pages) AS num_pages,
MAX(optimal_num_pages) AS optimal_num_pages,
MAX(bloat_ratio) AS bloat_ratio,
MAX(wasted_bytes) AS wasted_bytes,
SUM(idx_tuples) AS idx_tuples,
SUM(idx_pages) AS idx_pages,
SUM(idx_optimal_pages) AS idx_optimal_pages,
MAX(idx_bloat_ratio) AS idx_bloat_ratio,
SUM(idx_wasted_bytes) AS idx_wasted_bytes
FROM constants, bloat
GROUP BY table_name
)
SELECT * FROM result ORDER BY (wasted_bytes + idx_wasted_bytes) DESC, (bloat_ratio + idx_bloat_ratio) DESC;
WITH
constants AS (
SELECT
current_setting('block_size')::numeric AS bs,
-- What is this? Some header size?
23 AS hdr,
-- What is this? could be the word size or some other basic unit as it was defined as 4 by default
-- and 8 on 64-bit machines in the original query.
8 AS ma
),
table_info AS (
SELECT
schemaname,
tablename,
-- Average size of non-null column entries (per row)
SUM((1 - null_frac) * avg_width) AS avg_row_size,
-- Fraction of column entries that are null
MAX(null_frac) AS max_null_frac,
-- What is nullhdr? Counts tables that have null column entries and divides by 8, but why? Why add hdr to this?
hdr + (
SELECT 1 + count(*) / 8
FROM pg_stats s2
WHERE null_frac > 0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
) AS nullhdr
FROM pg_stats s, constants c
WHERE schemaname <> 'information_schema' AND schemaname <> 'pg_catalog'
GROUP BY 1, 2, c.hdr, c.ma, c.bs
),
row_size AS (
SELECT
tablename,
-- What is this? Looks like average data size per row.
(avg_row_size + (hdr + ma - (CASE WHEN hdr % ma = 0 THEN ma ELSE hdr % ma END)))::numeric AS datahdr,
-- What is this? Looks like average null size per row.
(max_null_frac * (nullhdr + ma - (CASE WHEN nullhdr % ma = 0 THEN ma ELSE nullhdr % ma END))) AS nullhdr2
FROM constants, table_info
),
estimate AS (
SELECT
tablename,
cc.reltuples::bigint,
cc.relpages::bigint,
-- Estimated optimal number of pages required.
CEIL(
(cc.reltuples * ((datahdr + ma - (CASE WHEN datahdr % ma = 0 THEN ma ELSE datahdr % ma END)) + nullhdr2 + 4)) / (bs - 20::float)
) AS otta
FROM constants, row_size
JOIN pg_class cc ON cc.relname = row_size.tablename
),
result AS (
SELECT
tablename AS table_name,
reltuples AS num_tuples,
relpages AS num_pages,
otta AS optimal_num_pages,
ROUND((CASE WHEN otta = 0 THEN 0.0 ELSE relpages::float / otta END)::numeric, 2) AS bloat_ratio,
CASE WHEN relpages < otta THEN 0 ELSE bs * (relpages - otta)::bigint END AS wasted_bytes
FROM constants, estimate
)
SELECT * FROM result ORDER BY wasted_bytes DESC, bloat_ratio DESC;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment