Skip to content

Instantly share code, notes, and snippets.

@davidtedfordholt
Last active May 15, 2024 19:17
Show Gist options
  • Save davidtedfordholt/3e15433c4ca6f8d64973e6653a66c884 to your computer and use it in GitHub Desktop.
Save davidtedfordholt/3e15433c4ca6f8d64973e6653a66c884 to your computer and use it in GitHub Desktop.
get BigQuery table sizes
DECLARE dataset_names ARRAY<STRING>;
DECLARE batch ARRAY<STRING>;
DECLARE batch_size INT64 DEFAULT 25;
CREATE TEMP TABLE results (
project_id STRING,
dataset_id STRING,
last_modified DATE,
table_id STRING,
row_count INT64,
size_GB FLOAT64
);
SET dataset_names = (
SELECT ARRAY_AGG(SCHEMA_NAME)
FROM `region-us.INFORMATION_SCHEMA.SCHEMATA`
);
LOOP
IF ARRAY_LENGTH(dataset_names) < 1 THEN
LEAVE;
END IF;
SET batch = (
SELECT ARRAY_AGG(d)
FROM UNNEST(dataset_names) AS d WITH OFFSET i
WHERE i < batch_size);
EXECUTE IMMEDIATE (
SELECT """INSERT INTO results """
|| STRING_AGG(
(SELECT """
SELECT project_id, dataset_id, CAST(TIMESTAMP_MILLIS(last_modified_time) AS DATE) AS last_modified, table_id, row_count, ROUND(size_bytes / 1024/1024/1024, 2) AS size_GB
FROM `""" || s || """.__TABLES__`"""),
" UNION ALL ")
FROM UNNEST(batch) AS s);
SET dataset_names = (
SELECT ARRAY_AGG(d)
FROM UNNEST(dataset_names) AS d
WHERE d NOT IN (SELECT * FROM UNNEST(batch)));
END LOOP;
SELECT * FROM results ORDER BY size_GB DESC;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment