Skip to content

Instantly share code, notes, and snippets.

@Yoshyn
Last active March 2, 2018 09:06
Show Gist options
  • Save Yoshyn/b057bef5638f93c9b73a35f575b418e8 to your computer and use it in GitHub Desktop.
Save Yoshyn/b057bef5638f93c9b73a35f575b418e8 to your computer and use it in GitHub Desktop.
Manage Postgres : Several script in order to manage Postgres
# Contains :
# * find_duplicated_index.sql
# * find_missing_index.sql
# * find_unused_index.sql
# * generate_data.sql : Generate a big dummy set of data
# * update_PG_varchar_to_string.rb : Update all field in varchar to string into a database
# * mass_export_and_delete.sql
# * remove_update_duplicate.sql
# * sharding_table_stats.sql : Get avg, percentile & co for a table over sharding
# * You can also look at : https://github.com/dalibo/powa
# * See also article :
Operation is safe (Add column, index...)?
http://leopard.in.ua/2016/09/20/safe-and-unsafe-operations-postgresql?utm_source=postgresweekly&utm_medium=email#.WOToBxKGPMW
Multi column : what index is used?
http://thebuild.com/blog/2016/12/30/the-multi-column-index-of-the-mysteries/?utm_source=postgresweekly&utm_medium=email
Partitionnement of one table PG =>
https://engineering.heroku.com/blogs/2016-09-13-handling-very-large-tables-in-postgres-using-partitioning/
https://karolgalanciak.com/blog/2016/06/05/scaling-up-rails-applications-with-postgresql-table-partitioning-part-1/
Attention : Impossible de déplacer d'une table de partitionnement à une autre par un update. La clef de partitionnement doit être une donnée immuable ou muable mais dans la mesure ou sa mutation ne la fait pas changer de clef de partitionnement.
/* source : https://wiki.postgresql.org/wiki/Index_Maintenance */
SELECT pg_size_pretty(SUM(pg_relation_size(idx))::BIGINT) AS SIZE,
(array_agg(idx))[1] AS idx1, (array_agg(idx))[2] AS idx2,
(array_agg(idx))[3] AS idx3, (array_agg(idx))[4] AS idx4
FROM (
SELECT indexrelid::regclass AS idx, (indrelid::text ||E'\n'|| indclass::text ||E'\n'|| indkey::text ||E'\n'||
COALESCE(indexprs::text,'')||E'\n' || COALESCE(indpred::text,'')) AS KEY
FROM pg_index) sub
GROUP BY KEY HAVING COUNT(*)>1
ORDER BY SUM(pg_relation_size(idx)) DESC;
/* source : http://www.dbrnd.com/2015/09/postgresql-script-to-find-the-unused-and-duplicate-index/ */
SELECT
indrelid::regclass AS TableName
,array_agg(indexrelid::regclass) AS Indexes
FROM pg_index
GROUP BY
indrelid
,indkey
HAVING COUNT(*) > 1
/* source http://www.dbrnd.com/2015/10/postgresql-script-to-find-a-missing-indexes-of-the-schema/ */
SELECT
relname AS TableName
,seq_scan-idx_scan AS TotalSeqScan
,CASE WHEN seq_scan-idx_scan > 0
THEN 'Missing Index Found'
ELSE 'Missing Index Not Found'
END AS MissingIndex
,pg_size_pretty(pg_relation_size(relname::regclass)) AS TableSize
,idx_scan AS TotalIndexScan
FROM pg_stat_all_tables
WHERE schemaname='public'
AND pg_relation_size(relname::regclass)>100000
ORDER BY 2 DESC;
/* source : https://jmorano.moretrix.com/2014/02/postgresql-monitor-unused-indexes/ */
SELECT
relid::regclass AS table,
indexrelid::regclass AS index,
pg_size_pretty(pg_relation_size(indexrelid::regclass)) AS index_size,
idx_tup_read,
idx_tup_fetch,
idx_scan
FROM
pg_stat_user_indexes
JOIN pg_index USING (indexrelid)
WHERE idx_scan = 0
AND indisunique IS FALSE
ORDER BY pg_relation_size(pg_index.indexrelid) / nullif(idx_scan, 0) DESC NULLS FIRST, pg_relation_size(pg_index.indexrelid) DESC;
\timing
DROP TABLE IF EXISTS t_random;
CREATE TABLE t_random AS
SELECT gs,
md5(random()::text),
cast(cast(random() AS integer) AS boolean) AS bool1,
cast(cast(random() AS integer) AS boolean) AS bool2,
trunc(random() * 5 + 1) AS enum
FROM generate_Series(1,4000000) AS gs;
SELECT count(*) FROM t_random;
EXPLAIN ANALYSE SELECT count(*) FROM t_random WHERE bool1 IS TRUE;
EXPLAIN ANALYSE SELECT count(*) FROM t_random WHERE enum = 1;
EXPLAIN ANALYSE SELECT count(*) FROM t_random WHERE bool1 IS TRUE AND enum = 1;
EXPLAIN ANALYSE SELECT count(*) FROM t_random WHERE bool1 IS TRUE OR enum = 1;
CREATE INDEX t_random_enum_idx ON t_random(enum); DROP INDEX t_random_enum_idx;
CREATE INDEX t_random_bool1_idx ON t_random(bool1); DROP INDEX t_random_bool1_idx;
CREATE INDEX t_random_enum_bool1_idx ON t_random(enum, bool1); DROP INDEX t_random_enum_bool1_idx;
# Export into a CSV file all row from a table regarding condition. Always keep the 10 first row...
\\COPY (
# EXPORT all row from the subquerry that have been updated over 6 month ago
SELECT * FROM (
# SELECT all row from a file whih condition_list_here, expect the 10 first
SELECT *
FROM table
WHERE condition_list_here
ORDER BY table.id DESC
OFFSET 10
) AS must_be_exported_rows_keep_ten
WHERE upated_at < (CURRENT_DATE - INTERVAL '6 month')
ORDER BY table.id ASC
) TO 'file_name.csv' DELIMITER ';' NULL 'null' QUOTE '\\\"' CSV ;
# Remove all row from a table regarding condition. Always keep the 10 first row...
# Store the number of deleted_row somewhere
BEGIN;
WITH row_deleted_count AS
( DELETE
FROM table
WHERE table.id IN
( SELECT table.id
FROM
( SELECT table.id
FROM table
WHERE condition_list_here
ORDER BY table.id DESC
OFFSET 10 ) AS must_be_exported_rows_keep_ten
WHERE updated_at < (CURRENT_DATE - INTERVAL '12 month')
ORDER BY table.id ASC ) RETURNING 1 )
# Store somewhere the number of archived row :
UPDATE somewhere
SET archived_row_count = archived_row_count + (SELECT COUNT(*) FROM row_deleted_count)
WHERE similar_condition_list_here
COMMIT;
# Ensure that no duplicate row will block the creation of the index in lowercase
# Transform the following exemple rows :
# value | id | my_table_id
#---------+--------------------------------------+--------------------------------------
# A | 4ffa750a-75cd-4571-b3f8-a1d9cab61ffc | d4b7787f-41f5-4675-832f-b1def36b653a
# B | 632cf07d-2189-408c-af5e-98f6dc9558a2 | d4b7787f-41f5-4675-832f-b1def36b653a
# a | 6b7f5100-32bf-405f-929b-b70dc4f651df | d4b7787f-41f5-4675-832f-b1def36b653a
# b | 067f13c9-3c3d-4971-828e-df6a393df76a | d4b7787f-41f5-4675-832f-b1def36b653a
# To this :
# value | id | my_table_id
#---------+--------------------------------------+--------------------------------------
# A | 4ffa750a-75cd-4571-b3f8-a1d9cab61ffc | d4b7787f-41f5-4675-832f-b1def36b653a
# B | 632cf07d-2189-408c-af5e-98f6dc9558a2 | d4b7787f-41f5-4675-832f-b1def36b653a
# a_2 | 6b7f5100-32bf-405f-929b-b70dc4f651df | d4b7787f-41f5-4675-832f-b1def36b653a
# b_2 | 067f13c9-3c3d-4971-828e-df6a393df76a | d4b7787f-41f5-4675-832f-b1def36b653a
WITH my_table_numbered_row AS (
SELECT id,
my_table_id,
row_number() OVER (PARTITION BY my_table_id, lower(value)) AS rn
FROM my_table_options
)
UPDATE my_table_options
SET value = (value || '_' || ctanr.rn::text)
FROM my_table_numbered_row ctanr
WHERE my_table_options.id = ctanr.id AND ctanr.rn > 1;
----- # restore the data
UPDATE my_table_options SET value = substring(value FROM '^[a-zA-Z]+');
-----
DELETE FROM my_table
WHERE id IN (
SELECT id
FROM ( SELECT id,
ROW_NUMBER() OVER(PARTITION BY field1_id, field_id ORDER BY created_at desc) AS Row
FROM my_table) duplicated_my_table
WHERE duplicated_my_table.Row > 1
)
RETURNING id, field1_id, field_id, created_at
# Use a materialized view & refresh it to avoid call the function each time.
CREATE OR REPLACE FUNCTION table_count_per_tenant_with_quartile_fct()
RETURNS TABLE (table_count bigint) AS $$
DECLARE
schema RECORD;
BEGIN
FOR schema IN EXECUTE
format('SELECT schema_name FROM information_schema.schemata WHERE schema_name ~ %L', '^tenant_')
LOOP
RETURN QUERY EXECUTE
format('SELECT count(*) FROM %I.table', schema.schema_name);
END LOOP;
END
WITH table_count_per_quartile AS (
SELECT
table_count, ntile(4) OVER (ORDER BY table_count) as quartile
FROM table_count_per_tenant_with_quartile_fct()
WHERE table_count > 0
)
SELECT
MIN(table_count) AS min,
MAX(table_count) AS max,
SUM(table_count) AS total,
AVG(table_count)::int AS avg,
percentile_disc(0.25) within group (ORDER BY table_count) AS percentile_25,
percentile_cont( 0.5 ) within group (order by table_count) AS median,
percentile_disc(0.75) within group (ORDER BY table_count) AS percentile_75,
count(*) AS tenant_count
FROM table_count_per_quartile
GROUP BY quartile ORDER BY quartile;
def change_db_varchar_XXX(new_type: :string)
Rails.application.eager_load!
ActiveRecord::Base.descendants.each do |orm_model|
next if orm_model.abstract_class? || !orm_model.table_exists?
orm_model.columns.each do |column|
if column.sql_type =~ /^character\(\d+\)$/
puts "Change #{orm_model.table_name}:#{column.name}"
ActiveRecord::Migration.change_column(
orm_model.table_name,
column.name,
new_type)
end
end
end
end
change_db_varchar_XXX
@Yoshyn
Copy link
Author

Yoshyn commented Jan 13, 2017

@Yoshyn
Copy link
Author

Yoshyn commented Jan 31, 2017

Partitionnement d'une table PG :

Attention : Impossible de déplacer d'une table de partitionnement à une autre par un update. La clef de partitionnement doit être une donnée immuable ou muable mais dans la mesure ou sa mutation ne la fait pas changer de clef de partitionnement.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment