kigster/size_of_columns.sql

## size_of_columns.sql
-- This file requires a local SQL file named "json_table_columns.csv" to have two columns:
--    [ "Table", "Column" ]
-- It then loops over records in this table computing the total size of the column in each table,
-- the average size of the column in that table, and finally the percentage that column occupies relative
-- to the entire table (including its indexes).


-- First we import the data
BEGIN;
    drop table if exists JSON_TABLE_COLUMNS;
    create table JSON_TABLE_COLUMNS (table_name text, column_name text);
COMMIT;

BEGIN;
    \COPY JSON_TABLE_COLUMNS(table_name, column_name)
    FROM
        'json_table_columns.csv' DELIMITER ',' CSV HEADER;
COMMIT;


-- Now we define some functions to help us compute the column sizes
CREATE OR REPLACE FUNCTION tc_col_size(table_name text, column_name text)
    RETURNS BIGINT AS
$$
    declare response BIGINT;
BEGIN
    EXECUTE 'select sum(pg_column_size(t."' || column_name || '")) from ' || table_name || ' t ' into response;
    return response;
END;
$$
LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION tc_col_avg(table_name text, column_name text)
    RETURNS BIGINT AS
$$
    declare response BIGINT;
BEGIN
    EXECUTE 'select avg(pg_column_size(t."' || column_name || '")) from ' || table_name || ' t ' into response;
    return response;
END;
$$
LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION pg_table_size(table_name text)
    RETURNS BIGINT AS
$$
    declare response BIGINT;
BEGIN
    SELECT data_size from
        (SELECT relname AS "relname",
            pg_relation_size(C.oid) AS "data_size"
        FROM
            pg_class C
        LEFT JOIN
            pg_namespace N ON (N.oid = C.relnamespace)
        WHERE
            nspname NOT IN ('pg_catalog', 'information_schema')
                and C.relkind in ( 't',  'r', 'i')
                and nspname not in ('pg_toast')
        ) as table_sizes where relname = table_name into response;
    return response;
END;
$$
LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION tc_col_pct(table_name text, column_name text)
    RETURNS BIGINT AS
$$
    declare column_size BIGINT;
    declare table_size BIGINT;
    declare response FLOAT;
BEGIN
    EXECUTE 'select coalesce(sum(pg_column_size(t."' || column_name || '")), 0) from ' || table_name || ' t ' into column_size;
    EXECUTE 'select coalesce(pg_table_size(''' || table_name || '''), 0)' into table_size;
    if table_size = 0 then
        return 0;
    end if;
    EXECUTE 'select ' || column_size || ' * 100.0 / ('|| table_size ||' ) ' into response;
    return response;
END;
$$
LANGUAGE plpgsql;

\pset numericlocale true

-- Finally, we can run the query
\COPY (
SELECT
    table_name                              as table,
    column_name                             as column,
    pg_table_size(table_name)               as table_size,
    tc_col_size(table_name, column_name)    as column_total_bytes,
    tc_col_avg(table_name, column_name)     as column_avg_bytes,
    tc_col_pct(table_name, column_name)     as column_pct_of_total
FROM
    JSON_TABLE_COLUMNS
WHERE
     tc_col_size(table_name, column_name) is not null and pg_table_size(table_name) != 0
ORDER BY
    column_total_bytes DESC,
    table_size DESC)
TO '/tmp/column-sizes.csv' DELIMITER ',' CSV HEADER;
	-- This file requires a local SQL file named "json_table_columns.csv" to have two columns:
	-- [ "Table", "Column" ]
	-- It then loops over records in this table computing the total size of the column in each table,
	-- the average size of the column in that table, and finally the percentage that column occupies relative
	-- to the entire table (including its indexes).


	-- First we import the data
	BEGIN;
	drop table if exists JSON_TABLE_COLUMNS;
	create table JSON_TABLE_COLUMNS (table_name text, column_name text);
	COMMIT;

	BEGIN;
	\COPY JSON_TABLE_COLUMNS(table_name, column_name)
	FROM
	'json_table_columns.csv' DELIMITER ',' CSV HEADER;
	COMMIT;



	-- Now we define some functions to help us compute the column sizes
	CREATE OR REPLACE FUNCTION tc_col_size(table_name text, column_name text)
	RETURNS BIGINT AS
	$$
	declare response BIGINT;
	BEGIN
	EXECUTE 'select sum(pg_column_size(t."' \|\| column_name \|\| '")) from ' \|\| table_name \|\| ' t ' into response;
	return response;
	END;
	$$
	LANGUAGE plpgsql;



	CREATE OR REPLACE FUNCTION tc_col_avg(table_name text, column_name text)
	RETURNS BIGINT AS
	$$
	declare response BIGINT;
	BEGIN
	EXECUTE 'select avg(pg_column_size(t."' \|\| column_name \|\| '")) from ' \|\| table_name \|\| ' t ' into response;
	return response;
	END;
	$$
	LANGUAGE plpgsql;



	CREATE OR REPLACE FUNCTION pg_table_size(table_name text)
	RETURNS BIGINT AS
	$$
	declare response BIGINT;
	BEGIN
	SELECT data_size from
	(SELECT relname AS "relname",
	pg_relation_size(C.oid) AS "data_size"
	FROM
	pg_class C
	LEFT JOIN
	pg_namespace N ON (N.oid = C.relnamespace)
	WHERE
	nspname NOT IN ('pg_catalog', 'information_schema')
	and C.relkind in ( 't', 'r', 'i')
	and nspname not in ('pg_toast')
	) as table_sizes where relname = table_name into response;
	return response;
	END;
	$$
	LANGUAGE plpgsql;




	CREATE OR REPLACE FUNCTION tc_col_pct(table_name text, column_name text)
	RETURNS BIGINT AS
	$$
	declare column_size BIGINT;
	declare table_size BIGINT;
	declare response FLOAT;
	BEGIN
	EXECUTE 'select coalesce(sum(pg_column_size(t."' \|\| column_name \|\| '")), 0) from ' \|\| table_name \|\| ' t ' into column_size;
	EXECUTE 'select coalesce(pg_table_size(''' \|\| table_name \|\| '''), 0)' into table_size;
	if table_size = 0 then
	return 0;
	end if;
	EXECUTE 'select ' \|\| column_size \|\| ' * 100.0 / ('\|\| table_size \|\|' ) ' into response;
	return response;
	END;
	$$
	LANGUAGE plpgsql;

	\pset numericlocale true

	-- Finally, we can run the query
	\COPY (
	SELECT
	table_name as table,
	column_name as column,
	pg_table_size(table_name) as table_size,
	tc_col_size(table_name, column_name) as column_total_bytes,
	tc_col_avg(table_name, column_name) as column_avg_bytes,
	tc_col_pct(table_name, column_name) as column_pct_of_total
	FROM
	JSON_TABLE_COLUMNS
	WHERE
	tc_col_size(table_name, column_name) is not null and pg_table_size(table_name) != 0
	ORDER BY
	column_total_bytes DESC,
	table_size DESC)
	TO '/tmp/column-sizes.csv' DELIMITER ',' CSV HEADER;