Millon15/pg_optimization.sql

## pg_optimization.sql
-- PostgreSQl DB maintenance clauses.
-- Sources:
-- https://wiki.postgresql.org/wiki/Index_Maintenance#Index_size.2Fusage_statistics
-- https://xzilla.net/blog/2008/Jul/Index-pruning-techniques.html


-- Index Maintenance
-- Source: https://wiki.postgresql.org/wiki/Index_Maintenance#Index_size.2Fusage_statistics
-- Index summary
SELECT pg_class.relname,
       pg_size_pretty(pg_class.reltuples::BIGINT) AS rows_in_bytes,
       pg_class.reltuples                         AS num_rows,
       COUNT(indexname)                           AS number_of_indexes,
       CASE
           WHEN x.is_unique = 1 THEN 'Y'
           ELSE 'N'
           END                                    AS UNIQUE,
       SUM(CASE
               WHEN number_of_columns = 1 THEN 1
               ELSE 0
           END)                                   AS single_column,
       SUM(CASE
               WHEN number_of_columns IS NULL THEN 0
               WHEN number_of_columns = 1 THEN 0
               ELSE 1
           END)                                   AS multi_column
FROM pg_namespace
         LEFT OUTER JOIN pg_class ON pg_namespace.oid = pg_class.relnamespace
         LEFT OUTER JOIN
     (SELECT indrelid,
             MAX(CAST(indisunique AS INTEGER)) AS is_unique
      FROM pg_index
      GROUP BY indrelid) x
     ON pg_class.oid = x.indrelid
         LEFT OUTER JOIN
     (SELECT c.relname AS ctablename, ipg.relname AS indexname, x.indnatts AS number_of_columns
      FROM pg_index x
               JOIN pg_class c ON c.oid = x.indrelid
               JOIN pg_class ipg ON ipg.oid = x.indexrelid)
         AS foo
     ON pg_class.relname = foo.ctablename
WHERE pg_namespace.nspname = 'public'
  AND pg_class.relkind = 'r'
GROUP BY pg_class.relname, pg_class.reltuples, x.is_unique
ORDER BY 2;

-- Index size/usage statistics
SELECT t.schemaname,
       t.tablename,
       indexname,
       c.reltuples                                                       AS num_rows,
       pg_size_pretty(pg_relation_size(quote_ident(t.schemaname)::text || '.' ||
                                       quote_ident(t.tablename)::text))  AS table_size,
       pg_size_pretty(pg_relation_size(quote_ident(t.schemaname)::text || '.' ||
                                       quote_ident(indexrelname)::text)) AS index_size,
       CASE
           WHEN indisunique THEN 'Y'
           ELSE 'N'
           END                                                           AS UNIQUE,
       number_of_scans,
       tuples_read,
       tuples_fetched
FROM pg_tables t
         LEFT OUTER JOIN pg_class c ON t.tablename = c.relname
         LEFT OUTER JOIN (
    SELECT c.relname     AS ctablename,
           ipg.relname   AS indexname,
           x.indnatts    AS number_of_columns,
           idx_scan      AS number_of_scans,
           idx_tup_read  AS tuples_read,
           idx_tup_fetch AS tuples_fetched,
           indexrelname,
           indisunique,
           schemaname
    FROM pg_index x
             JOIN pg_class c ON c.oid = x.indrelid
             JOIN pg_class ipg ON ipg.oid = x.indexrelid
             JOIN pg_stat_all_indexes psai ON x.indexrelid = psai.indexrelid
) AS foo ON t.tablename = foo.ctablename AND t.schemaname = foo.schemaname
WHERE t.schemaname NOT IN ('pg_catalog', 'information_schema')
ORDER BY 1, 2;


-- Index Pruning Techniques.
-- Adviced to read source article before run the clauses below and try to purge some indexes from our DB.
-- Source: https://xzilla.net/blog/2008/Jul/Index-pruning-techniques.html

-- Finding duplicate indexes
select indrelid::regclass              table_name,
       array_agg(indexrelid::regclass) duplicating_indexes
from pg_index
group by indrelid, indkey
having count(*) > 1;

/**
 * Now, this is good for finding true duplicates, but we can also take that a step further.
 * In this next query, we list out all of the indexes that have a matching multi-column index,
 * meaning if you are indexing column a, and columns a,b, we will pop that up on the list.
 */
select a.indrelid::regclass table_name,
       a.indexrelid::regclass,
       b.indexrelid::regclass
from (select *, array_to_string(indkey, ' ') as cols from pg_index) a
         join (select *, array_to_string(indkey, ' ') as cols from pg_index) b on
    (a.indrelid = b.indrelid and a.indexrelid > b.indexrelid
        and ((a.cols LIKE b.cols || '%' and coalesce(substr(a.cols, length(b.cols) + 1, 1), ' ') = ' ')
            or (b.cols LIKE a.cols || '%' and coalesce(substr(b.cols, length(a.cols) + 1, 1), ' ') = ' ')))
order by table_name;

-- Finding useless indexes
/**
 * The last method listed here is an attempt to determine which indexes are unnecessary
 * based on the data contained within the database.
 * To use it, you’ll once again need to have real data available for looking at
 * (though it can be just a copy of production; it doesn’t have to actually be production),
 * and you’ll want to make sure all of your tables are freshly analyzed
 * (with a default_stats_target of at least 100 ;-D ).
 * What this query does it look at the relative distinctness of the values in a given indexes column
 * and the show indexes where there is not enough relative distinctness that the planner is likely to use the index.
 * !!! Warning, this query isn’t as solid as the others !!!
 */
-- if stuck with some of the names in the SELECT section below, you always could use manuals about tables mentioned in the query:
-- https://www.postgresql.org/docs/11/catalog-pg-statistic.html + https://www.postgresql.org/docs/11/catalog-pg-index.html
select starelid::regclass     table_name,
       indexrelid::regclass   maybe_not_so_useful_index_name,
       array_agg(staattnum)   aggr_num_of_attributes,
       relpages,
       reltuples,
       array_agg(stadistinct) aggr_num_distinct_values_column_stores -- aggregated_number_of_distinct_nonnull_data_values_stored_in_the_column
from pg_index
         join pg_statistic on (starelid = indrelid and staattnum = ANY (indkey))
         join pg_class on (indexrelid = oid)
where not (indisunique or indisprimary)
  and (relpages > 100 or reltuples > 1000)
  and stadistinct > 0.2 IS NOT FALSE
  and case when stadistinct < 0 then stadistinct > -0.8 else reltuples / stadistinct > 0.2 end
group by starelid, indexrelid, relpages, reltuples
order by starelid;

-- Wanna see some statistics about your favorite table's column? Use the clause below!
-- Source: https://stackoverflow.com/questions/8479781/postgresql-analyze-pg-statistic-table
-- select * from pg_stats where tablename = INSERT_SOME_TABLE_NAME and attname = INSERT_SOME_COLUMN_NAME;
	-- PostgreSQl DB maintenance clauses.
	-- Sources:
	-- https://wiki.postgresql.org/wiki/Index_Maintenance#Index_size.2Fusage_statistics
	-- https://xzilla.net/blog/2008/Jul/Index-pruning-techniques.html


	-- Index Maintenance
	-- Source: https://wiki.postgresql.org/wiki/Index_Maintenance#Index_size.2Fusage_statistics
	-- Index summary
	SELECT pg_class.relname,
	pg_size_pretty(pg_class.reltuples::BIGINT) AS rows_in_bytes,
	pg_class.reltuples AS num_rows,
	COUNT(indexname) AS number_of_indexes,
	CASE
	WHEN x.is_unique = 1 THEN 'Y'
	ELSE 'N'
	END AS UNIQUE,
	SUM(CASE
	WHEN number_of_columns = 1 THEN 1
	ELSE 0
	END) AS single_column,
	SUM(CASE
	WHEN number_of_columns IS NULL THEN 0
	WHEN number_of_columns = 1 THEN 0
	ELSE 1
	END) AS multi_column
	FROM pg_namespace
	LEFT OUTER JOIN pg_class ON pg_namespace.oid = pg_class.relnamespace
	LEFT OUTER JOIN
	(SELECT indrelid,
	MAX(CAST(indisunique AS INTEGER)) AS is_unique
	FROM pg_index
	GROUP BY indrelid) x
	ON pg_class.oid = x.indrelid
	LEFT OUTER JOIN
	(SELECT c.relname AS ctablename, ipg.relname AS indexname, x.indnatts AS number_of_columns
	FROM pg_index x
	JOIN pg_class c ON c.oid = x.indrelid
	JOIN pg_class ipg ON ipg.oid = x.indexrelid)
	AS foo
	ON pg_class.relname = foo.ctablename
	WHERE pg_namespace.nspname = 'public'
	AND pg_class.relkind = 'r'
	GROUP BY pg_class.relname, pg_class.reltuples, x.is_unique
	ORDER BY 2;

	-- Index size/usage statistics
	SELECT t.schemaname,
	t.tablename,
	indexname,
	c.reltuples AS num_rows,
	pg_size_pretty(pg_relation_size(quote_ident(t.schemaname)::text \|\| '.' \|\|
	quote_ident(t.tablename)::text)) AS table_size,
	pg_size_pretty(pg_relation_size(quote_ident(t.schemaname)::text \|\| '.' \|\|
	quote_ident(indexrelname)::text)) AS index_size,
	CASE
	WHEN indisunique THEN 'Y'
	ELSE 'N'
	END AS UNIQUE,
	number_of_scans,
	tuples_read,
	tuples_fetched
	FROM pg_tables t
	LEFT OUTER JOIN pg_class c ON t.tablename = c.relname
	LEFT OUTER JOIN (
	SELECT c.relname AS ctablename,
	ipg.relname AS indexname,
	x.indnatts AS number_of_columns,
	idx_scan AS number_of_scans,
	idx_tup_read AS tuples_read,
	idx_tup_fetch AS tuples_fetched,
	indexrelname,
	indisunique,
	schemaname
	FROM pg_index x
	JOIN pg_class c ON c.oid = x.indrelid
	JOIN pg_class ipg ON ipg.oid = x.indexrelid
	JOIN pg_stat_all_indexes psai ON x.indexrelid = psai.indexrelid
	) AS foo ON t.tablename = foo.ctablename AND t.schemaname = foo.schemaname
	WHERE t.schemaname NOT IN ('pg_catalog', 'information_schema')
	ORDER BY 1, 2;


	-- Index Pruning Techniques.
	-- Adviced to read source article before run the clauses below and try to purge some indexes from our DB.
	-- Source: https://xzilla.net/blog/2008/Jul/Index-pruning-techniques.html

	-- Finding duplicate indexes
	select indrelid::regclass table_name,
	array_agg(indexrelid::regclass) duplicating_indexes
	from pg_index
	group by indrelid, indkey
	having count(*) > 1;

	/**
	* Now, this is good for finding true duplicates, but we can also take that a step further.
	* In this next query, we list out all of the indexes that have a matching multi-column index,
	* meaning if you are indexing column a, and columns a,b, we will pop that up on the list.
	*/
	select a.indrelid::regclass table_name,
	a.indexrelid::regclass,
	b.indexrelid::regclass
	from (select *, array_to_string(indkey, ' ') as cols from pg_index) a
	join (select *, array_to_string(indkey, ' ') as cols from pg_index) b on
	(a.indrelid = b.indrelid and a.indexrelid > b.indexrelid
	and ((a.cols LIKE b.cols \|\| '%' and coalesce(substr(a.cols, length(b.cols) + 1, 1), ' ') = ' ')
	or (b.cols LIKE a.cols \|\| '%' and coalesce(substr(b.cols, length(a.cols) + 1, 1), ' ') = ' ')))
	order by table_name;

	-- Finding useless indexes
	/**
	* The last method listed here is an attempt to determine which indexes are unnecessary
	* based on the data contained within the database.
	* To use it, you’ll once again need to have real data available for looking at
	* (though it can be just a copy of production; it doesn’t have to actually be production),
	* and you’ll want to make sure all of your tables are freshly analyzed
	* (with a default_stats_target of at least 100 ;-D ).
	* What this query does it look at the relative distinctness of the values in a given indexes column
	* and the show indexes where there is not enough relative distinctness that the planner is likely to use the index.
	* !!! Warning, this query isn’t as solid as the others !!!
	*/
	-- if stuck with some of the names in the SELECT section below, you always could use manuals about tables mentioned in the query:
	-- https://www.postgresql.org/docs/11/catalog-pg-statistic.html + https://www.postgresql.org/docs/11/catalog-pg-index.html
	select starelid::regclass table_name,
	indexrelid::regclass maybe_not_so_useful_index_name,
	array_agg(staattnum) aggr_num_of_attributes,
	relpages,
	reltuples,
	array_agg(stadistinct) aggr_num_distinct_values_column_stores -- aggregated_number_of_distinct_nonnull_data_values_stored_in_the_column
	from pg_index
	join pg_statistic on (starelid = indrelid and staattnum = ANY (indkey))
	join pg_class on (indexrelid = oid)
	where not (indisunique or indisprimary)
	and (relpages > 100 or reltuples > 1000)
	and stadistinct > 0.2 IS NOT FALSE
	and case when stadistinct < 0 then stadistinct > -0.8 else reltuples / stadistinct > 0.2 end
	group by starelid, indexrelid, relpages, reltuples
	order by starelid;

	-- Wanna see some statistics about your favorite table's column? Use the clause below!
	-- Source: https://stackoverflow.com/questions/8479781/postgresql-analyze-pg-statistic-table
	-- select * from pg_stats where tablename = INSERT_SOME_TABLE_NAME and attname = INSERT_SOME_COLUMN_NAME;