Skip to content

Instantly share code, notes, and snippets.

@gregrahn
Last active October 11, 2019 15:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gregrahn/881ea80a65fe5bab94c670f2b6d370a3 to your computer and use it in GitHub Desktop.
Save gregrahn/881ea80a65fe5bab94c670f2b6d370a3 to your computer and use it in GitHub Desktop.
--
-- list db/table/num_rows for tables with stats
--
SELECT
DBS.NAME AS DB_NAME,
TBLS.TBL_NAME AS TABLE_NAME,
TABLE_PARAMS.PARAM_VALUE as NUM_ROWS
FROM DBS
JOIN TBLS USING (DB_ID)
JOIN TABLE_PARAMS USING (TBL_ID)
WHERE PARAM_KEY = 'numRows'
ORDER BY 1,2
;
--
-- list db/table/partition/num_rows
--
SELECT
DBS.NAME AS DB_NAME,
TBLS.TBL_NAME AS TABLE_NAME,
PARTITIONS.PART_NAME AS PART_NAME,
PARTITION_PARAMS.PARAM_VALUE as NUM_ROWS
FROM DBS
JOIN TBLS USING (DB_ID)
JOIN PARTITIONS USING (TBL_ID)
JOIN PARTITION_PARAMS USING (PART_ID)
WHERE PARTITION_PARAMS.PARAM_KEY = 'numRows'
ORDER BY 1,2,3
;
--
-- tables w/o stats
--
SELECT
DBS.NAME AS DB_NAME,
TBLS.TBL_NAME AS TABLE_NAME
FROM DBS
JOIN TBLS USING (DB_ID)
WHERE TBLS.TBL_ID
NOT IN (
SELECT TBL_ID
FROM TABLE_PARAMS
WHERE PARAM_KEY = 'numRows'
AND PARAM_VALUE != -1
)
ORDER BY 1,2
;
--
-- table/partitions w/o stats
--
SELECT
DBS.NAME AS DB_NAME,
TBLS.TBL_NAME AS TABLE_NAME,
PARTITIONS.PART_NAME AS PART_NAME
FROM DBS
JOIN TBLS USING (DB_ID)
JOIN PARTITIONS USING (TBL_ID)
WHERE PARTITIONS.PART_ID
NOT IN (
SELECT PART_ID
FROM PARTITION_PARAMS
WHERE PARAM_KEY = 'numRows'
AND PARAM_VALUE != -1
)
ORDER BY 1,2,3
;
--
-- generate compute stats commands for tables w/o stats
--
SELECT
concat('compute stats ', DBS.NAME, '.', TBLS.TBL_NAME, ';') as cmd
FROM DBS
JOIN TBLS USING (DB_ID)
WHERE TBLS.TBL_ID
NOT IN (
SELECT TBL_ID
FROM TABLE_PARAMS
WHERE PARAM_KEY = 'numRows'
AND PARAM_VALUE != -1
)
ORDER BY DBS.NAME,TBLS.TBL_NAME
;
--
-- total size of incremental stats per table
--
SELECT
DBS.NAME,
TBL_NAME,
SUM(LENGTH(PARTITION_PARAMS.PARAM_KEY) +
LENGTH(PARTITION_PARAMS.PARAM_VALUE)) AS SIZE_IN_BYTES
FROM DBS
JOIN TBLS USING (DB_ID)
JOIN PARTITIONS USING (TBL_ID)
JOIN PARTITION_PARAMS USING (PART_ID)
WHERE
PARTITION_PARAMS.PARAM_KEY LIKE 'impala_intermediate%'
GROUP BY
DBS.NAME,
TBL_NAME;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment