Skip to content

Instantly share code, notes, and snippets.

@kigster
Last active March 17, 2023 19:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kigster/1c71f852e9eafc8b2b2efa85928b2d67 to your computer and use it in GitHub Desktop.
Save kigster/1c71f852e9eafc8b2b2efa85928b2d67 to your computer and use it in GitHub Desktop.
-- This file requires a local SQL file named "json_table_columns.csv" to have two columns:
-- [ "Table", "Column" ]
-- It then loops over records in this table computing the total size of the column in each table,
-- the average size of the column in that table, and finally the percentage that column occupies relative
-- to the entire table (including its indexes).
-- First we import the data
BEGIN;
drop table if exists JSON_TABLE_COLUMNS;
create table JSON_TABLE_COLUMNS (table_name text, column_name text);
COMMIT;
BEGIN;
\COPY JSON_TABLE_COLUMNS(table_name, column_name)
FROM
'json_table_columns.csv' DELIMITER ',' CSV HEADER;
COMMIT;
-- Now we define some functions to help us compute the column sizes
CREATE OR REPLACE FUNCTION tc_col_size(table_name text, column_name text)
RETURNS BIGINT AS
$$
declare response BIGINT;
BEGIN
EXECUTE 'select sum(pg_column_size(t."' || column_name || '")) from ' || table_name || ' t ' into response;
return response;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION tc_col_avg(table_name text, column_name text)
RETURNS BIGINT AS
$$
declare response BIGINT;
BEGIN
EXECUTE 'select avg(pg_column_size(t."' || column_name || '")) from ' || table_name || ' t ' into response;
return response;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION pg_table_size(table_name text)
RETURNS BIGINT AS
$$
declare response BIGINT;
BEGIN
SELECT data_size from
(SELECT relname AS "relname",
pg_relation_size(C.oid) AS "data_size"
FROM
pg_class C
LEFT JOIN
pg_namespace N ON (N.oid = C.relnamespace)
WHERE
nspname NOT IN ('pg_catalog', 'information_schema')
and C.relkind in ( 't', 'r', 'i')
and nspname not in ('pg_toast')
) as table_sizes where relname = table_name into response;
return response;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION tc_col_pct(table_name text, column_name text)
RETURNS BIGINT AS
$$
declare column_size BIGINT;
declare table_size BIGINT;
declare response FLOAT;
BEGIN
EXECUTE 'select coalesce(sum(pg_column_size(t."' || column_name || '")), 0) from ' || table_name || ' t ' into column_size;
EXECUTE 'select coalesce(pg_table_size(''' || table_name || '''), 0)' into table_size;
if table_size = 0 then
return 0;
end if;
EXECUTE 'select ' || column_size || ' * 100.0 / ('|| table_size ||' ) ' into response;
return response;
END;
$$
LANGUAGE plpgsql;
\pset numericlocale true
-- Finally, we can run the query
\COPY (
SELECT
table_name as table,
column_name as column,
pg_table_size(table_name) as table_size,
tc_col_size(table_name, column_name) as column_total_bytes,
tc_col_avg(table_name, column_name) as column_avg_bytes,
tc_col_pct(table_name, column_name) as column_pct_of_total
FROM
JSON_TABLE_COLUMNS
WHERE
tc_col_size(table_name, column_name) is not null and pg_table_size(table_name) != 0
ORDER BY
column_total_bytes DESC,
table_size DESC)
TO '/tmp/column-sizes.csv' DELIMITER ',' CSV HEADER;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment