Skip to content

Instantly share code, notes, and snippets.

@Millon15
Created April 9, 2020 15:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Millon15/2468934e2ccdf890a2f3ac442339d101 to your computer and use it in GitHub Desktop.
Save Millon15/2468934e2ccdf890a2f3ac442339d101 to your computer and use it in GitHub Desktop.
-- PostgreSQl DB maintenance clauses.
-- Sources:
-- https://wiki.postgresql.org/wiki/Index_Maintenance#Index_size.2Fusage_statistics
-- https://xzilla.net/blog/2008/Jul/Index-pruning-techniques.html
-- Index Maintenance
-- Source: https://wiki.postgresql.org/wiki/Index_Maintenance#Index_size.2Fusage_statistics
-- Index summary
SELECT pg_class.relname,
pg_size_pretty(pg_class.reltuples::BIGINT) AS rows_in_bytes,
pg_class.reltuples AS num_rows,
COUNT(indexname) AS number_of_indexes,
CASE
WHEN x.is_unique = 1 THEN 'Y'
ELSE 'N'
END AS UNIQUE,
SUM(CASE
WHEN number_of_columns = 1 THEN 1
ELSE 0
END) AS single_column,
SUM(CASE
WHEN number_of_columns IS NULL THEN 0
WHEN number_of_columns = 1 THEN 0
ELSE 1
END) AS multi_column
FROM pg_namespace
LEFT OUTER JOIN pg_class ON pg_namespace.oid = pg_class.relnamespace
LEFT OUTER JOIN
(SELECT indrelid,
MAX(CAST(indisunique AS INTEGER)) AS is_unique
FROM pg_index
GROUP BY indrelid) x
ON pg_class.oid = x.indrelid
LEFT OUTER JOIN
(SELECT c.relname AS ctablename, ipg.relname AS indexname, x.indnatts AS number_of_columns
FROM pg_index x
JOIN pg_class c ON c.oid = x.indrelid
JOIN pg_class ipg ON ipg.oid = x.indexrelid)
AS foo
ON pg_class.relname = foo.ctablename
WHERE pg_namespace.nspname = 'public'
AND pg_class.relkind = 'r'
GROUP BY pg_class.relname, pg_class.reltuples, x.is_unique
ORDER BY 2;
-- Index size/usage statistics
SELECT t.schemaname,
t.tablename,
indexname,
c.reltuples AS num_rows,
pg_size_pretty(pg_relation_size(quote_ident(t.schemaname)::text || '.' ||
quote_ident(t.tablename)::text)) AS table_size,
pg_size_pretty(pg_relation_size(quote_ident(t.schemaname)::text || '.' ||
quote_ident(indexrelname)::text)) AS index_size,
CASE
WHEN indisunique THEN 'Y'
ELSE 'N'
END AS UNIQUE,
number_of_scans,
tuples_read,
tuples_fetched
FROM pg_tables t
LEFT OUTER JOIN pg_class c ON t.tablename = c.relname
LEFT OUTER JOIN (
SELECT c.relname AS ctablename,
ipg.relname AS indexname,
x.indnatts AS number_of_columns,
idx_scan AS number_of_scans,
idx_tup_read AS tuples_read,
idx_tup_fetch AS tuples_fetched,
indexrelname,
indisunique,
schemaname
FROM pg_index x
JOIN pg_class c ON c.oid = x.indrelid
JOIN pg_class ipg ON ipg.oid = x.indexrelid
JOIN pg_stat_all_indexes psai ON x.indexrelid = psai.indexrelid
) AS foo ON t.tablename = foo.ctablename AND t.schemaname = foo.schemaname
WHERE t.schemaname NOT IN ('pg_catalog', 'information_schema')
ORDER BY 1, 2;
-- Index Pruning Techniques.
-- Adviced to read source article before run the clauses below and try to purge some indexes from our DB.
-- Source: https://xzilla.net/blog/2008/Jul/Index-pruning-techniques.html
-- Finding duplicate indexes
select indrelid::regclass table_name,
array_agg(indexrelid::regclass) duplicating_indexes
from pg_index
group by indrelid, indkey
having count(*) > 1;
/**
* Now, this is good for finding true duplicates, but we can also take that a step further.
* In this next query, we list out all of the indexes that have a matching multi-column index,
* meaning if you are indexing column a, and columns a,b, we will pop that up on the list.
*/
select a.indrelid::regclass table_name,
a.indexrelid::regclass,
b.indexrelid::regclass
from (select *, array_to_string(indkey, ' ') as cols from pg_index) a
join (select *, array_to_string(indkey, ' ') as cols from pg_index) b on
(a.indrelid = b.indrelid and a.indexrelid > b.indexrelid
and ((a.cols LIKE b.cols || '%' and coalesce(substr(a.cols, length(b.cols) + 1, 1), ' ') = ' ')
or (b.cols LIKE a.cols || '%' and coalesce(substr(b.cols, length(a.cols) + 1, 1), ' ') = ' ')))
order by table_name;
-- Finding useless indexes
/**
* The last method listed here is an attempt to determine which indexes are unnecessary
* based on the data contained within the database.
* To use it, you’ll once again need to have real data available for looking at
* (though it can be just a copy of production; it doesn’t have to actually be production),
* and you’ll want to make sure all of your tables are freshly analyzed
* (with a default_stats_target of at least 100 ;-D ).
* What this query does it look at the relative distinctness of the values in a given indexes column
* and the show indexes where there is not enough relative distinctness that the planner is likely to use the index.
* !!! Warning, this query isn’t as solid as the others !!!
*/
-- if stuck with some of the names in the SELECT section below, you always could use manuals about tables mentioned in the query:
-- https://www.postgresql.org/docs/11/catalog-pg-statistic.html + https://www.postgresql.org/docs/11/catalog-pg-index.html
select starelid::regclass table_name,
indexrelid::regclass maybe_not_so_useful_index_name,
array_agg(staattnum) aggr_num_of_attributes,
relpages,
reltuples,
array_agg(stadistinct) aggr_num_distinct_values_column_stores -- aggregated_number_of_distinct_nonnull_data_values_stored_in_the_column
from pg_index
join pg_statistic on (starelid = indrelid and staattnum = ANY (indkey))
join pg_class on (indexrelid = oid)
where not (indisunique or indisprimary)
and (relpages > 100 or reltuples > 1000)
and stadistinct > 0.2 IS NOT FALSE
and case when stadistinct < 0 then stadistinct > -0.8 else reltuples / stadistinct > 0.2 end
group by starelid, indexrelid, relpages, reltuples
order by starelid;
-- Wanna see some statistics about your favorite table's column? Use the clause below!
-- Source: https://stackoverflow.com/questions/8479781/postgresql-analyze-pg-statistic-table
-- select * from pg_stats where tablename = INSERT_SOME_TABLE_NAME and attname = INSERT_SOME_COLUMN_NAME;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment