kivanio/all-indexes.sql

## all-indexes.sql
-- List all existing indexes and include some useful info about them (incl. the index's definition)

SELECT
    schemaname AS schemaname,
    t.relname AS tablename,
    ix.relname AS indexname,
    regexp_replace(pg_get_indexdef(i.indexrelid), '^[^\(]*\((.*)\)$', '\1') AS columns,
    regexp_replace(pg_get_indexdef(i.indexrelid), '.* USING ([^ ]*) \(.*', '\1') AS algorithm,
    indisunique AS UNIQUE,
    indisprimary AS PRIMARY,
    indisvalid AS valid,
    pg_size_pretty(pg_relation_size(i.indexrelid)) AS size,
    idx_scan AS indexscans,
    idx_tup_read AS tuplereads,
    idx_tup_fetch AS tuplefetches,
    pg_get_indexdef(i.indexrelid) AS definition
FROM
    pg_index i
    INNER JOIN pg_class t ON t.oid = i.indrelid
    INNER JOIN pg_class ix ON ix.oid = i.indexrelid
    LEFT JOIN pg_stat_user_indexes ui ON ui.indexrelid = i.indexrelid
WHERE
    schemaname IS NOT NULL
ORDER BY
    schemaname ASC,
    tablename ASC,
    indexname ASC;

select * from pg_stat_user_indexes;

## auto-explain.sql
-- Enable the auto_explain plugin. Useful for doing `explain analyze` on PG functions
-- which consist of many nested statements you need to know which of those statements
-- are taking long and why.

-- Sadly, this plugin is not available on Heroku Postgres. You should migrate to RDS. :)

-- AWS RDS
LOAD '$libdir/plugins/auto_explain';
-- General PG installation
LOAD 'auto_explain';

SET auto_explain.log_format=text;
SET auto_explain.log_analyze = true;
SET auto_explain.log_nested_statements = true;
SET auto_explain.log_timing = true;
-- Set this to 0 to enable analysis on all statements
SET auto_explain.log_min_duration = 500;

-- Optionally send all the auto_explain messages to your preferred GUI client
-- Otherwise the messages might only appear in the db logs
SET client_min_messages = log;

## blocking-queries.sql
-- See which queries are being blocked by which other query.
-- Note that seeing SOMETHING here does not necessarily mean a problem, but the results
-- should change quickly. Only when a statement hangs for too long it is a cause for concern.

SELECT
    blocked_locks.pid AS blocked_pid,
    blocked_activity.usename AS blocked_user,
    blocking_locks.pid AS blocking_pid,
    blocking_activity.usename AS blocking_user,
    blocked_activity.query AS blocked_statement,
    blocking_activity.query AS current_statement_in_blocking_process,
    blocked_activity.xact_start AS query_start
FROM
    pg_catalog.pg_locks blocked_locks
    JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
    JOIN pg_catalog.pg_locks blocking_locks ON blocking_locks.locktype = blocked_locks.locktype
        AND blocking_locks.DATABASE IS NOT DISTINCT FROM blocked_locks.DATABASE
        AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
        AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
        AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
        AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
        AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
        AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
        AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
        AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
        AND blocking_locks.pid != blocked_locks.pid
    JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE
    NOT blocked_locks.GRANTED
ORDER BY
    blocking_pid;

## database-bloat.sql
-- See how much of the table and index data is current, live rows and how much of it is dead rows.
-- If you see big numbers in the ratios, it might be time to run `VACUUM` or, if you can affort downtime, `VACCUM FULL`.
-- Do not forget to also do `ANALYZE`, either after you have vacuumed or as part of the vacuuming itself with
-- `VACUUM [FULL] ANALYZE;`.

SELECT
  current_database(), schemaname, tablename, /*reltuples::bigint, relpages::bigint, otta,*/
  ROUND((CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages::FLOAT/otta END)::NUMERIC,1) AS tbloat,
  CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::BIGINT END AS wastedbytes,
  iname, /*ituples::bigint, ipages::bigint, iotta,*/
  ROUND((CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages::FLOAT/iotta END)::NUMERIC,1) AS ibloat,
  CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes
FROM (
  SELECT
    schemaname, tablename, cc.reltuples, cc.relpages, bs,
    CEIL((cc.reltuples*((datahdr+ma-
      (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::FLOAT)) AS otta,
    COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
    COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::FLOAT)),0) AS iotta -- very rough approximation, assumes all cols
  FROM (
    SELECT
      ma,bs,schemaname,tablename,
      (datawidth+(hdr+ma-(CASE WHEN hdr%ma=0 THEN ma ELSE hdr%ma END)))::NUMERIC AS datahdr,
      (maxfracsum*(nullhdr+ma-(CASE WHEN nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
    FROM (
      SELECT
        schemaname, tablename, hdr, ma, bs,
        SUM((1-null_frac)*avg_width) AS datawidth,
        MAX(null_frac) AS maxfracsum,
        hdr+(
          SELECT 1+COUNT(*)/8
          FROM pg_stats s2
          WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
        ) AS nullhdr
      FROM pg_stats s, (
        SELECT
          (SELECT current_setting('block_size')::NUMERIC) AS bs,
          CASE WHEN SUBSTRING(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
          CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma
        FROM (SELECT version() AS v) AS foo
      ) AS constants
      GROUP BY 1,2,3,4,5
    ) AS foo
  ) AS rs
  JOIN pg_class cc ON cc.relname = rs.tablename
  JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
  LEFT JOIN pg_index i ON indrelid = cc.oid
  LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
) AS sml
WHERE schemaname = 'public'
ORDER BY wastedbytes DESC;

## maybe-missing-indexes.sql
-- See on which tables PG is doing a lot of sequential scans. On small tables
-- a seq scan is more efficient than an index scan so always think if adding
-- an index would really make sense.
-- Sadly, it does not show which fields are "hot", only the tables names.

SELECT
    relname as table,
    pg_size_pretty(pg_relation_size(relid::regclass)) AS size,
    seq_scan as sequential_scans,
    idx_scan as index_scans,
    seq_scan - idx_scan AS difference,
    CASE WHEN seq_scan - idx_scan > 0 THEN
        'Missing Index?'
    ELSE
        'OK'
    END AS status
FROM
    pg_stat_all_tables
WHERE
    schemaname = 'public'
    AND pg_relation_size(relid::regclass) > 80000
ORDER BY
    difference DESC;

## unused-indexes.sql
-- See which indexes are not being used or are being used seldomly.
-- Change the `idx_scan` query filter to your liking - for heavy workloads,
-- even an index with several thousand hits could be potentially "useless"
-- if other indexes have hit rates in millions.

SELECT
    schemaname AS schemaname,
    relname AS tablename,
    indexrelname AS indexname,
    pg_size_pretty(pg_relation_size(i.indexrelid)) AS size,
    idx_scan AS indexscans
FROM
    pg_stat_user_indexes ui
INNER JOIN pg_index i ON ui.indexrelid = i.indexrelid
WHERE
    NOT indisunique
    AND idx_scan <= 50
ORDER BY
    pg_relation_size(i.indexrelid) DESC,
    relname ASC;
	-- List all existing indexes and include some useful info about them (incl. the index's definition)

	SELECT
	schemaname AS schemaname,
	t.relname AS tablename,
	ix.relname AS indexname,
	regexp_replace(pg_get_indexdef(i.indexrelid), '^[^\(]\((.)\)$', '\1') AS columns,
	regexp_replace(pg_get_indexdef(i.indexrelid), '.* USING ([^ ]) \(.', '\1') AS algorithm,
	indisunique AS UNIQUE,
	indisprimary AS PRIMARY,
	indisvalid AS valid,
	pg_size_pretty(pg_relation_size(i.indexrelid)) AS size,
	idx_scan AS indexscans,
	idx_tup_read AS tuplereads,
	idx_tup_fetch AS tuplefetches,
	pg_get_indexdef(i.indexrelid) AS definition
	FROM
	pg_index i
	INNER JOIN pg_class t ON t.oid = i.indrelid
	INNER JOIN pg_class ix ON ix.oid = i.indexrelid
	LEFT JOIN pg_stat_user_indexes ui ON ui.indexrelid = i.indexrelid
	WHERE
	schemaname IS NOT NULL
	ORDER BY
	schemaname ASC,
	tablename ASC,
	indexname ASC;

	select * from pg_stat_user_indexes;
	-- Enable the auto_explain plugin. Useful for doing `explain analyze` on PG functions
	-- which consist of many nested statements you need to know which of those statements
	-- are taking long and why.

	-- Sadly, this plugin is not available on Heroku Postgres. You should migrate to RDS. :)

	-- AWS RDS
	LOAD '$libdir/plugins/auto_explain';
	-- General PG installation
	LOAD 'auto_explain';

	SET auto_explain.log_format=text;
	SET auto_explain.log_analyze = true;
	SET auto_explain.log_nested_statements = true;
	SET auto_explain.log_timing = true;
	-- Set this to 0 to enable analysis on all statements
	SET auto_explain.log_min_duration = 500;

	-- Optionally send all the auto_explain messages to your preferred GUI client
	-- Otherwise the messages might only appear in the db logs
	SET client_min_messages = log;
	-- See which queries are being blocked by which other query.
	-- Note that seeing SOMETHING here does not necessarily mean a problem, but the results
	-- should change quickly. Only when a statement hangs for too long it is a cause for concern.

	SELECT
	blocked_locks.pid AS blocked_pid,
	blocked_activity.usename AS blocked_user,
	blocking_locks.pid AS blocking_pid,
	blocking_activity.usename AS blocking_user,
	blocked_activity.query AS blocked_statement,
	blocking_activity.query AS current_statement_in_blocking_process,
	blocked_activity.xact_start AS query_start
	FROM
	pg_catalog.pg_locks blocked_locks
	JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
	JOIN pg_catalog.pg_locks blocking_locks ON blocking_locks.locktype = blocked_locks.locktype
	AND blocking_locks.DATABASE IS NOT DISTINCT FROM blocked_locks.DATABASE
	AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
	AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
	AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
	AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
	AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
	AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
	AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
	AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
	AND blocking_locks.pid != blocked_locks.pid
	JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
	WHERE
	NOT blocked_locks.GRANTED
	ORDER BY
	blocking_pid;
	-- See how much of the table and index data is current, live rows and how much of it is dead rows.
	-- If you see big numbers in the ratios, it might be time to run `VACUUM` or, if you can affort downtime, `VACCUM FULL`.
	-- Do not forget to also do `ANALYZE`, either after you have vacuumed or as part of the vacuuming itself with
	-- `VACUUM [FULL] ANALYZE;`.

	SELECT
	current_database(), schemaname, tablename, /reltuples::bigint, relpages::bigint, otta,/
	ROUND((CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages::FLOAT/otta END)::NUMERIC,1) AS tbloat,
	CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::BIGINT END AS wastedbytes,
	iname, /ituples::bigint, ipages::bigint, iotta,/
	ROUND((CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages::FLOAT/iotta END)::NUMERIC,1) AS ibloat,
	CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes
	FROM (
	SELECT
	schemaname, tablename, cc.reltuples, cc.relpages, bs,
	CEIL((cc.reltuples*((datahdr+ma-
	(CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::FLOAT)) AS otta,
	COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
	COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::FLOAT)),0) AS iotta -- very rough approximation, assumes all cols
	FROM (
	SELECT
	ma,bs,schemaname,tablename,
	(datawidth+(hdr+ma-(CASE WHEN hdr%ma=0 THEN ma ELSE hdr%ma END)))::NUMERIC AS datahdr,
	(maxfracsum*(nullhdr+ma-(CASE WHEN nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
	FROM (
	SELECT
	schemaname, tablename, hdr, ma, bs,
	SUM((1-null_frac)*avg_width) AS datawidth,
	MAX(null_frac) AS maxfracsum,
	hdr+(
	SELECT 1+COUNT(*)/8
	FROM pg_stats s2
	WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
	) AS nullhdr
	FROM pg_stats s, (
	SELECT
	(SELECT current_setting('block_size')::NUMERIC) AS bs,
	CASE WHEN SUBSTRING(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
	CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma
	FROM (SELECT version() AS v) AS foo
	) AS constants
	GROUP BY 1,2,3,4,5
	) AS foo
	) AS rs
	JOIN pg_class cc ON cc.relname = rs.tablename
	JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
	LEFT JOIN pg_index i ON indrelid = cc.oid
	LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
	) AS sml
	WHERE schemaname = 'public'
	ORDER BY wastedbytes DESC;
	-- See on which tables PG is doing a lot of sequential scans. On small tables
	-- a seq scan is more efficient than an index scan so always think if adding
	-- an index would really make sense.
	-- Sadly, it does not show which fields are "hot", only the tables names.

	SELECT
	relname as table,
	pg_size_pretty(pg_relation_size(relid::regclass)) AS size,
	seq_scan as sequential_scans,
	idx_scan as index_scans,
	seq_scan - idx_scan AS difference,
	CASE WHEN seq_scan - idx_scan > 0 THEN
	'Missing Index?'
	ELSE
	'OK'
	END AS status
	FROM
	pg_stat_all_tables
	WHERE
	schemaname = 'public'
	AND pg_relation_size(relid::regclass) > 80000
	ORDER BY
	difference DESC;
	-- See which indexes are not being used or are being used seldomly.
	-- Change the `idx_scan` query filter to your liking - for heavy workloads,
	-- even an index with several thousand hits could be potentially "useless"
	-- if other indexes have hit rates in millions.

	SELECT
	schemaname AS schemaname,
	relname AS tablename,
	indexrelname AS indexname,
	pg_size_pretty(pg_relation_size(i.indexrelid)) AS size,
	idx_scan AS indexscans
	FROM
	pg_stat_user_indexes ui
	INNER JOIN pg_index i ON ui.indexrelid = i.indexrelid
	WHERE
	NOT indisunique
	AND idx_scan <= 50
	ORDER BY
	pg_relation_size(i.indexrelid) DESC,
	relname ASC;