jzavisek/all-indexes.sql

## all-indexes.sql
-- List all existing indexes and include some useful info about them (incl. the index's definition)

SELECT
    schemaname AS schemaname,
    t.relname AS tablename,
    ix.relname AS indexname,
    regexp_replace(pg_get_indexdef(i.indexrelid), '^[^\(]*\((.*)\)$', '\1') AS columns,
    regexp_replace(pg_get_indexdef(i.indexrelid), '.* USING ([^ ]*) \(.*', '\1') AS algorithm,
    indisunique AS UNIQUE,
    indisprimary AS PRIMARY, indisvalid AS valid,
    indexprs::text,
    indpred::text,
    pg_get_indexdef(i.indexrelid) AS definition
FROM
    pg_index i
    INNER JOIN pg_class t ON t.oid = i.indrelid
    INNER JOIN pg_class ix ON ix.oid = i.indexrelid
    LEFT JOIN pg_stat_user_indexes ui ON ui.indexrelid = i.indexrelid
WHERE
    schemaname IS NOT NULL
ORDER BY
    schemaname ASC,
    tablename ASC,
    indexname ASC;

## auto-explain.sql
-- Enable the auto_explain plugin. Useful for doing `explain analyze` on PG functions
-- which consist of many nested statements you need to know which of those statements
-- are taking long and why.

-- Sadly, this plugin is not available on Heroku Postgres. You should migrate to RDS. :)

-- AWS RDS
LOAD '$libdir/plugins/auto_explain';
-- General PG installation
LOAD 'auto_explain';

SET auto_explain.log_format=text;
SET auto_explain.log_analyze = true;
SET auto_explain.log_nested_statements = true;
SET auto_explain.log_timing = true;
-- Set this to 0 to enable analysis on all statements
SET auto_explain.log_min_duration = 500;

-- Optionally send all the auto_explain messages to your preferred GUI client
-- Otherwise the messages might only appear in the db logs
SET client_min_messages = log;

## blocking-queries.sql
-- See which queries are being blocked by which other query.
-- Note that seeing SOMETHING here does not necessarily mean a problem, but the results
-- should change quickly. Only when a statement hangs for too long it is a cause for concern.

SELECT
    blocked_locks.pid AS blocked_pid,
    blocked_activity.usename AS blocked_user,
    blocking_locks.pid AS blocking_pid,
    blocking_activity.usename AS blocking_user,
    blocked_activity.query AS blocked_statement,
    blocking_activity.query AS current_statement_in_blocking_process,
    blocked_activity.xact_start AS query_start
FROM
    pg_catalog.pg_locks blocked_locks
    JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
    JOIN pg_catalog.pg_locks blocking_locks ON blocking_locks.locktype = blocked_locks.locktype
        AND blocking_locks.DATABASE IS NOT DISTINCT FROM blocked_locks.DATABASE
        AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
        AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
        AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
        AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
        AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
        AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
        AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
        AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
        AND blocking_locks.pid != blocked_locks.pid
    JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE
    NOT blocked_locks.GRANTED
ORDER BY
    blocking_pid;

## index-bloat.sql
-- Lists indexes which are too big compared to the actual table size (I think).
-- Not sure I fully understand this one, use with caution.

WITH btree_index_atts AS (
    SELECT
        nspname,
        relname,
        reltuples,
        relpages,
        indrelid,
        relam,
        regexp_split_to_table(indkey::text, ' ')::smallint AS attnum,
        indexrelid AS index_oid
    FROM
        pg_index
        JOIN pg_class ON pg_class.oid = pg_index.indexrelid
        JOIN pg_namespace ON pg_namespace.oid = pg_class.relnamespace
        JOIN pg_am ON pg_class.relam = pg_am.oid
    WHERE
        pg_am.amname = 'btree'
),
index_item_sizes AS (
    SELECT
        i.nspname,
        i.relname,
        i.reltuples,
        i.relpages,
        i.relam,
        (quote_ident(s.schemaname) || '.' || quote_ident(s.tablename))::regclass AS starelid,
        a.attrelid AS table_oid,
        index_oid,
        current_setting('block_size')::numeric AS bs,
        /* MAXALIGN: 4 on 32bits, 8 on 64bits (and mingw32 ?) */
        CASE WHEN version() ~ 'mingw32'
            OR version() ~ '64-bit' THEN
            8
        ELSE
            4
        END AS maxalign,
        24 AS pagehdr,
        /* per tuple header: add index_attribute_bm if some cols are null-able */
        CASE WHEN max(coalesce(s.null_frac,
                0)) = 0 THEN
            2
        ELSE
            6
        END AS index_tuple_hdr,
        /* data len: we remove null values save space using it fractionnal part from stats */
        sum((1 - coalesce(s.null_frac,
                0)) * coalesce(s.avg_width,
            2048)) AS nulldatawidth
    FROM
        pg_attribute AS a
        JOIN pg_stats AS s ON (quote_ident(s.schemaname) || '.' || quote_ident(s.tablename))::regclass = a.attrelid
            AND s.attname = a.attname
        JOIN btree_index_atts AS i ON i.indrelid = a.attrelid
            AND a.attnum = i.attnum
    WHERE
        a.attnum > 0
    GROUP BY
        1,
        2,
        3,
        4,
        5,
        6,
        7,
        8,
        9
),
index_aligned AS (
    SELECT
        maxalign,
        bs,
        nspname,
        relname AS index_name,
        reltuples,
        relpages,
        relam,
        table_oid,
        index_oid,
        (2 + maxalign - CASE
            /* Add padding to the index tuple header to align on MAXALIGN */
            WHEN index_tuple_hdr % maxalign = 0 THEN
                maxalign
            ELSE
                index_tuple_hdr % maxalign
            END + nulldatawidth + maxalign - CASE
            /* Add padding to the data to align on MAXALIGN */
            WHEN nulldatawidth::integer % maxalign = 0 THEN
                maxalign
            ELSE
                nulldatawidth::integer % maxalign
            END)::numeric AS nulldatahdrwidth,
        pagehdr
    FROM
        index_item_sizes AS s1
),
otta_calc AS (
    SELECT
        bs,
        nspname,
        table_oid,
        index_oid,
        index_name,
        relpages,
        coalesce(ceil((reltuples * (4 + nulldatahdrwidth)) / (bs - pagehdr::float)) + CASE WHEN am.amname IN('hash', 'btree') THEN
            1
        ELSE
            0
        END,
        0
        /* btree and hash have a metadata reserved block */
) AS otta
    FROM
        index_aligned AS s2
    LEFT JOIN pg_am am ON s2.relam = am.oid
),
raw_bloat AS (
    SELECT
        nspname,
        c.relname AS table_name,
        index_name,
        bs * (sub.relpages)::bigint AS totalbytes,
        CASE WHEN sub.relpages <= otta THEN
            0
        ELSE
            bs * (sub.relpages - otta)::bigint
        END AS wastedbytes,
        CASE WHEN sub.relpages <= otta THEN
            0
        ELSE
            bs * (sub.relpages - otta)::bigint * 100 / (bs * (sub.relpages)::bigint)
        END AS realbloat,
        pg_relation_size(sub.table_oid) AS table_bytes,
        stat.idx_scan AS index_scans,
        stat.indexrelid
    FROM
        otta_calc AS sub
        JOIN pg_class AS c ON c.oid = sub.table_oid
        JOIN pg_stat_user_indexes AS stat ON sub.index_oid = stat.indexrelid
)
SELECT
    nspname AS schemaname,
    table_name AS tablename,
    index_name AS indexname, pg_size_pretty(wastedbytes) AS bloat_bytes,
    pg_size_pretty(totalbytes) AS index_bytes,
    pg_get_indexdef(rb.indexrelid) AS definition,
    indisprimary AS primary
FROM
    raw_bloat rb
    INNER JOIN pg_index i ON i.indexrelid = rb.indexrelid
WHERE
    wastedbytes >= 100000000
ORDER BY
    wastedbytes DESC, indexname;

## maybe-missing-indexes.sql
-- See on which tables PG is doing a lot of sequential scans. On small tables
-- a seq scan is more efficient than an index scan so always think if adding
-- an index would really make sense.
-- Sadly, it does not show which fields are "hot", only the tables names.

SELECT
    relname as table,
    pg_size_pretty(pg_relation_size(relid::regclass)) AS size,
    seq_scan as sequential_scans,
    idx_scan as index_scans,
    seq_scan - idx_scan AS difference,
    CASE WHEN seq_scan - idx_scan > 0 THEN
        'Missing Index?'
    ELSE
        'OK'
    END AS status
FROM
    pg_stat_all_tables
WHERE
    schemaname = 'public'
    AND pg_relation_size(relid::regclass) > 80000
ORDER BY
    difference DESC;

## unused-indexes.sql
-- See which indexes are not being used or are being used seldomly.
-- Change the `idx_scan` query filter to your liking - for heavy workloads,
-- even an index with several thousand hits could be potentially "useless"
-- if other indexes have hit rates in millions.

SELECT
    schemaname AS schemaname,
    relname AS tablename,
    indexrelname AS indexname,
    pg_size_pretty(pg_relation_size(i.indexrelid)) AS size,
    idx_scan AS indexscans
FROM
    pg_stat_user_indexes ui
INNER JOIN pg_index i ON ui.indexrelid = i.indexrelid
WHERE
    NOT indisunique
    AND idx_scan <= 50
ORDER BY
    pg_relation_size(i.indexrelid) DESC,
    relname ASC;
	-- List all existing indexes and include some useful info about them (incl. the index's definition)

	SELECT
	schemaname AS schemaname,
	t.relname AS tablename,
	ix.relname AS indexname,
	regexp_replace(pg_get_indexdef(i.indexrelid), '^[^\(]\((.)\)$', '\1') AS columns,
	regexp_replace(pg_get_indexdef(i.indexrelid), '.* USING ([^ ]) \(.', '\1') AS algorithm,
	indisunique AS UNIQUE,
	indisprimary AS PRIMARY, indisvalid AS valid,
	indexprs::text,
	indpred::text,
	pg_get_indexdef(i.indexrelid) AS definition
	FROM
	pg_index i
	INNER JOIN pg_class t ON t.oid = i.indrelid
	INNER JOIN pg_class ix ON ix.oid = i.indexrelid
	LEFT JOIN pg_stat_user_indexes ui ON ui.indexrelid = i.indexrelid
	WHERE
	schemaname IS NOT NULL
	ORDER BY
	schemaname ASC,
	tablename ASC,
	indexname ASC;
	-- Enable the auto_explain plugin. Useful for doing `explain analyze` on PG functions
	-- which consist of many nested statements you need to know which of those statements
	-- are taking long and why.

	-- Sadly, this plugin is not available on Heroku Postgres. You should migrate to RDS. :)

	-- AWS RDS
	LOAD '$libdir/plugins/auto_explain';
	-- General PG installation
	LOAD 'auto_explain';

	SET auto_explain.log_format=text;
	SET auto_explain.log_analyze = true;
	SET auto_explain.log_nested_statements = true;
	SET auto_explain.log_timing = true;
	-- Set this to 0 to enable analysis on all statements
	SET auto_explain.log_min_duration = 500;

	-- Optionally send all the auto_explain messages to your preferred GUI client
	-- Otherwise the messages might only appear in the db logs
	SET client_min_messages = log;
	-- See which queries are being blocked by which other query.
	-- Note that seeing SOMETHING here does not necessarily mean a problem, but the results
	-- should change quickly. Only when a statement hangs for too long it is a cause for concern.

	SELECT
	blocked_locks.pid AS blocked_pid,
	blocked_activity.usename AS blocked_user,
	blocking_locks.pid AS blocking_pid,
	blocking_activity.usename AS blocking_user,
	blocked_activity.query AS blocked_statement,
	blocking_activity.query AS current_statement_in_blocking_process,
	blocked_activity.xact_start AS query_start
	FROM
	pg_catalog.pg_locks blocked_locks
	JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
	JOIN pg_catalog.pg_locks blocking_locks ON blocking_locks.locktype = blocked_locks.locktype
	AND blocking_locks.DATABASE IS NOT DISTINCT FROM blocked_locks.DATABASE
	AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
	AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
	AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
	AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
	AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
	AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
	AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
	AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
	AND blocking_locks.pid != blocked_locks.pid
	JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
	WHERE
	NOT blocked_locks.GRANTED
	ORDER BY
	blocking_pid;
	-- Lists indexes which are too big compared to the actual table size (I think).
	-- Not sure I fully understand this one, use with caution.

	WITH btree_index_atts AS (
	SELECT
	nspname,
	relname,
	reltuples,
	relpages,
	indrelid,
	relam,
	regexp_split_to_table(indkey::text, ' ')::smallint AS attnum,
	indexrelid AS index_oid
	FROM
	pg_index
	JOIN pg_class ON pg_class.oid = pg_index.indexrelid
	JOIN pg_namespace ON pg_namespace.oid = pg_class.relnamespace
	JOIN pg_am ON pg_class.relam = pg_am.oid
	WHERE
	pg_am.amname = 'btree'
	),
	index_item_sizes AS (
	SELECT
	i.nspname,
	i.relname,
	i.reltuples,
	i.relpages,
	i.relam,
	(quote_ident(s.schemaname) \|\| '.' \|\| quote_ident(s.tablename))::regclass AS starelid,
	a.attrelid AS table_oid,
	index_oid,
	current_setting('block_size')::numeric AS bs,
	/* MAXALIGN: 4 on 32bits, 8 on 64bits (and mingw32 ?) */
	CASE WHEN version() ~ 'mingw32'
	OR version() ~ '64-bit' THEN
	8
	ELSE
	4
	END AS maxalign,
	24 AS pagehdr,
	/* per tuple header: add index_attribute_bm if some cols are null-able */
	CASE WHEN max(coalesce(s.null_frac,
	0)) = 0 THEN
	2
	ELSE
	6
	END AS index_tuple_hdr,
	/* data len: we remove null values save space using it fractionnal part from stats */
	sum((1 - coalesce(s.null_frac,
	0)) * coalesce(s.avg_width,
	2048)) AS nulldatawidth
	FROM
	pg_attribute AS a
	JOIN pg_stats AS s ON (quote_ident(s.schemaname) \|\| '.' \|\| quote_ident(s.tablename))::regclass = a.attrelid
	AND s.attname = a.attname
	JOIN btree_index_atts AS i ON i.indrelid = a.attrelid
	AND a.attnum = i.attnum
	WHERE
	a.attnum > 0
	GROUP BY
	1,
	2,
	3,
	4,
	5,
	6,
	7,
	8,
	9
	),
	index_aligned AS (
	SELECT
	maxalign,
	bs,
	nspname,
	relname AS index_name,
	reltuples,
	relpages,
	relam,
	table_oid,
	index_oid,
	(2 + maxalign - CASE
	/* Add padding to the index tuple header to align on MAXALIGN */
	WHEN index_tuple_hdr % maxalign = 0 THEN
	maxalign
	ELSE
	index_tuple_hdr % maxalign
	END + nulldatawidth + maxalign - CASE
	/* Add padding to the data to align on MAXALIGN */
	WHEN nulldatawidth::integer % maxalign = 0 THEN
	maxalign
	ELSE
	nulldatawidth::integer % maxalign
	END)::numeric AS nulldatahdrwidth,
	pagehdr
	FROM
	index_item_sizes AS s1
	),
	otta_calc AS (
	SELECT
	bs,
	nspname,
	table_oid,
	index_oid,
	index_name,
	relpages,
	coalesce(ceil((reltuples * (4 + nulldatahdrwidth)) / (bs - pagehdr::float)) + CASE WHEN am.amname IN('hash', 'btree') THEN
	1
	ELSE
	0
	END,
	0
	/* btree and hash have a metadata reserved block */
	) AS otta
	FROM
	index_aligned AS s2
	LEFT JOIN pg_am am ON s2.relam = am.oid
	),
	raw_bloat AS (
	SELECT
	nspname,
	c.relname AS table_name,
	index_name,
	bs * (sub.relpages)::bigint AS totalbytes,
	CASE WHEN sub.relpages <= otta THEN
	0
	ELSE
	bs * (sub.relpages - otta)::bigint
	END AS wastedbytes,
	CASE WHEN sub.relpages <= otta THEN
	0
	ELSE
	bs * (sub.relpages - otta)::bigint * 100 / (bs * (sub.relpages)::bigint)
	END AS realbloat,
	pg_relation_size(sub.table_oid) AS table_bytes,
	stat.idx_scan AS index_scans,
	stat.indexrelid
	FROM
	otta_calc AS sub
	JOIN pg_class AS c ON c.oid = sub.table_oid
	JOIN pg_stat_user_indexes AS stat ON sub.index_oid = stat.indexrelid
	)
	SELECT
	nspname AS schemaname,
	table_name AS tablename,
	index_name AS indexname, pg_size_pretty(wastedbytes) AS bloat_bytes,
	pg_size_pretty(totalbytes) AS index_bytes,
	pg_get_indexdef(rb.indexrelid) AS definition,
	indisprimary AS primary
	FROM
	raw_bloat rb
	INNER JOIN pg_index i ON i.indexrelid = rb.indexrelid
	WHERE
	wastedbytes >= 100000000
	ORDER BY
	wastedbytes DESC, indexname;
	-- See on which tables PG is doing a lot of sequential scans. On small tables
	-- a seq scan is more efficient than an index scan so always think if adding
	-- an index would really make sense.
	-- Sadly, it does not show which fields are "hot", only the tables names.

	SELECT
	relname as table,
	pg_size_pretty(pg_relation_size(relid::regclass)) AS size,
	seq_scan as sequential_scans,
	idx_scan as index_scans,
	seq_scan - idx_scan AS difference,
	CASE WHEN seq_scan - idx_scan > 0 THEN
	'Missing Index?'
	ELSE
	'OK'
	END AS status
	FROM
	pg_stat_all_tables
	WHERE
	schemaname = 'public'
	AND pg_relation_size(relid::regclass) > 80000
	ORDER BY
	difference DESC;
	-- See which indexes are not being used or are being used seldomly.
	-- Change the `idx_scan` query filter to your liking - for heavy workloads,
	-- even an index with several thousand hits could be potentially "useless"
	-- if other indexes have hit rates in millions.

	SELECT
	schemaname AS schemaname,
	relname AS tablename,
	indexrelname AS indexname,
	pg_size_pretty(pg_relation_size(i.indexrelid)) AS size,
	idx_scan AS indexscans
	FROM
	pg_stat_user_indexes ui
	INNER JOIN pg_index i ON ui.indexrelid = i.indexrelid
	WHERE
	NOT indisunique
	AND idx_scan <= 50
	ORDER BY
	pg_relation_size(i.indexrelid) DESC,
	relname ASC;