Last active
March 17, 2023 19:04
-
-
Save kigster/1c71f852e9eafc8b2b2efa85928b2d67 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- This file requires a local SQL file named "json_table_columns.csv" to have two columns: | |
-- [ "Table", "Column" ] | |
-- It then loops over records in this table computing the total size of the column in each table, | |
-- the average size of the column in that table, and finally the percentage that column occupies relative | |
-- to the entire table (including its indexes). | |
-- First we import the data | |
BEGIN; | |
drop table if exists JSON_TABLE_COLUMNS; | |
create table JSON_TABLE_COLUMNS (table_name text, column_name text); | |
COMMIT; | |
BEGIN; | |
\COPY JSON_TABLE_COLUMNS(table_name, column_name) | |
FROM | |
'json_table_columns.csv' DELIMITER ',' CSV HEADER; | |
COMMIT; | |
-- Now we define some functions to help us compute the column sizes | |
CREATE OR REPLACE FUNCTION tc_col_size(table_name text, column_name text) | |
RETURNS BIGINT AS | |
$$ | |
declare response BIGINT; | |
BEGIN | |
EXECUTE 'select sum(pg_column_size(t."' || column_name || '")) from ' || table_name || ' t ' into response; | |
return response; | |
END; | |
$$ | |
LANGUAGE plpgsql; | |
CREATE OR REPLACE FUNCTION tc_col_avg(table_name text, column_name text) | |
RETURNS BIGINT AS | |
$$ | |
declare response BIGINT; | |
BEGIN | |
EXECUTE 'select avg(pg_column_size(t."' || column_name || '")) from ' || table_name || ' t ' into response; | |
return response; | |
END; | |
$$ | |
LANGUAGE plpgsql; | |
CREATE OR REPLACE FUNCTION pg_table_size(table_name text) | |
RETURNS BIGINT AS | |
$$ | |
declare response BIGINT; | |
BEGIN | |
SELECT data_size from | |
(SELECT relname AS "relname", | |
pg_relation_size(C.oid) AS "data_size" | |
FROM | |
pg_class C | |
LEFT JOIN | |
pg_namespace N ON (N.oid = C.relnamespace) | |
WHERE | |
nspname NOT IN ('pg_catalog', 'information_schema') | |
and C.relkind in ( 't', 'r', 'i') | |
and nspname not in ('pg_toast') | |
) as table_sizes where relname = table_name into response; | |
return response; | |
END; | |
$$ | |
LANGUAGE plpgsql; | |
CREATE OR REPLACE FUNCTION tc_col_pct(table_name text, column_name text) | |
RETURNS BIGINT AS | |
$$ | |
declare column_size BIGINT; | |
declare table_size BIGINT; | |
declare response FLOAT; | |
BEGIN | |
EXECUTE 'select coalesce(sum(pg_column_size(t."' || column_name || '")), 0) from ' || table_name || ' t ' into column_size; | |
EXECUTE 'select coalesce(pg_table_size(''' || table_name || '''), 0)' into table_size; | |
if table_size = 0 then | |
return 0; | |
end if; | |
EXECUTE 'select ' || column_size || ' * 100.0 / ('|| table_size ||' ) ' into response; | |
return response; | |
END; | |
$$ | |
LANGUAGE plpgsql; | |
\pset numericlocale true | |
-- Finally, we can run the query | |
\COPY ( | |
SELECT | |
table_name as table, | |
column_name as column, | |
pg_table_size(table_name) as table_size, | |
tc_col_size(table_name, column_name) as column_total_bytes, | |
tc_col_avg(table_name, column_name) as column_avg_bytes, | |
tc_col_pct(table_name, column_name) as column_pct_of_total | |
FROM | |
JSON_TABLE_COLUMNS | |
WHERE | |
tc_col_size(table_name, column_name) is not null and pg_table_size(table_name) != 0 | |
ORDER BY | |
column_total_bytes DESC, | |
table_size DESC) | |
TO '/tmp/column-sizes.csv' DELIMITER ',' CSV HEADER; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment