Skip to content

Instantly share code, notes, and snippets.

@nadavwn
Last active July 10, 2023 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nadavwn/a01777b9feaafdf8c859041e5a51c7b5 to your computer and use it in GitHub Desktop.
Save nadavwn/a01777b9feaafdf8c859041e5a51c7b5 to your computer and use it in GitHub Desktop.
Code for getting the median daily slot usage and the estimated max configuration for Standard Edition
-- set the region, DEFAULT: us
-- set the time window dt_start and dt_end
-- set the project 'dataset_project_id' in case running in a different project (i.e. admin)
DECLARE
dt_start DEFAULT TIMESTAMP("2023-03-01 00:00:00") ;
DECLARE
dt_end DEFAULT TIMESTAMP("2023-05-31 23:59:59");
--SET @@dataset_project_id = 'my-project-id';
WITH
max_config AS (
WITH
p90_hour AS (
SELECT
TIMESTAMP_TRUNC(jbo.period_start, HOUR) AS usage_date,
jbo.reservation_id,
jbo.project_id,
jbo.job_type,
-- Aggregate total_slots_ms used for all jobs on this day and divide
-- by the number of milliseconds in a day. Most accurate for days with consistent slot usage
ROUND(SUM(jbo.period_slot_ms) / (1000 * 60 * 60)) AS sum_hourly_slot_usage_in_hours
FROM
`region-us`.INFORMATION_SCHEMA.JOBS_TIMELINE_BY_PROJECT jbo
WHERE
(jbo.statement_type != "SCRIPT"
OR jbo.statement_type IS NULL)
AND jbo.job_type = 'QUERY'
AND reservation_id IS NULL
AND jbo.period_start BETWEEN dt_start
AND dt_end-- Avoid duplicate byte counting in parent and children jobs.
GROUP BY
usage_date,
jbo.project_id,
jbo.job_type,
jbo.reservation_id
ORDER BY
usage_date DESC)
SELECT
project_id,
PERCENTILE_CONT(sum_hourly_slot_usage_in_hours,0.9) OVER() AS p90_hourly_slot_usage
FROM
p90_hour
where sum_hourly_slot_usage_in_hours > 100
LIMIT
1 )
SELECT
project_id,
dt_start,
dt_end,
p90_hourly_slot_usage,
CEIL(p90_hourly_slot_usage/100)*100 as inital_max_configuration
FROM
max_config;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment