sayle-doit/bq_job_editions_cost_comparison_with_autoscaler.sql

## bq_job_editions_cost_comparison_with_autoscaler.sql
/*
 *  This query will look at the past 30 days of job history to analyze it for costs under
 *  BigQuery Editions while utilizing the new autoscaling feature that was introduced.
 *  It does this for those using both PAYG (Pay As You Go) and commitment models.
 *  It will also compare this versus running the query with the on-demand model.
 *
 *  Note that this query utilizes some math modeling behaviors that the BigQuery
 *  autoscaler uses. Namely these are the up to 10 seconds "slot scale up time,"
 *  the minimum of 60 seconds "slot scale down time," and the behavior that the
 *  autoscaler scales up and down in factors of 100 slots for each job.
 *  Note that on scaling up and down it does bill for those periods of time.
 *
 *  The costs generated by this are strictly an estimate and for Editions
 *  should be considered a minimum cost of the query as the autoscaler
 *  may not scale "evenly" and may change scaling during runtime.
 */

WITH jobs_duration AS
(
  SELECT
    job_id,
    query,

    start_time,
    end_time,

    TIMESTAMP_DIFF(end_time, start_time, SECOND) AS job_duration_s,
    -- Add 60 seconds scale down time + 10 seconds scale up time to duration for autoscaled duration
    TIMESTAMP_DIFF(end_time, start_time, SECOND)+60+10 AS autoscaled_duration_s,

    -- Turn slot ms into slot secons
    total_slot_ms/ 1000 AS total_slot_s
  FROM
    `region-us`.INFORMATION_SCHEMA.JOBS
  WHERE
    creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY) AND CURRENT_TIMESTAMP()
    --AND job_id NOT LIKE 'script_%'
    AND total_slot_ms > 0
    AND total_bytes_billed > 0
    AND state = "DONE"
),
jobs_slot_usage AS
(
  SELECT
    jd.job_id,
    jd.query,
    job_duration_s,
    total_bytes_processed / POW(1024, 4) AS total_tib_processed,

    CAST(FLOOR((CEIL(total_slot_s / autoscaled_duration_s) + 99) / 100) * 100 AS INT64) AS autoscaled_slot_usage,
    autoscaled_duration_s,

    -- Slot usage (rounded up to next 100 slots) * duration in seconds / (60 seconds * 60 seconds) = slot/hour used by this job
    CAST(FLOOR((CEIL(total_slot_s / autoscaled_duration_s) + 99) / 100) * 100 AS INT64)
      * (autoscaled_duration_s/(60*60))
      AS slot_hour,
  FROM
    jobs_duration AS jd JOIN `region-us`.INFORMATION_SCHEMA.JOBS AS jobs
      ON jd.job_id = jobs.job_id
)

SELECT
  job_id,
  query,
  job_duration_s,
  total_tib_processed,

  -- Removed this because it has been a few months since the price change and this isn't very relevant anymore
  --ROUND(total_tib_processed * 5, 2) AS old_on_demand_cost,

  ROUND(total_tib_processed * 6.25, 2) AS on_demand_cost,

  autoscaled_duration_s,
  slot_hour,

  ROUND(slot_hour * 0.04, 2) AS standard_edition_cost,
  ROUND(slot_hour * 0.06, 2) AS enterprise_edition_cost,
  ROUND(slot_hour * 0.1, 2) AS enterprise_plus_edition_cost,

  ROUND(slot_hour * 0.048, 2) AS enterprise_edition_1yr_commit_cost,
  ROUND(slot_hour * 0.036, 2) AS enterprise_edition_3yr_commit_cost,

  ROUND(slot_hour * 0.08, 2) AS enterprise_plus_edition_1yr_commit_cost,
  ROUND(slot_hour * 0.06, 2) AS enterprise_plus_edition_3yr_commit_cost
FROM
  jobs_slot_usage;
	/*
	* This query will look at the past 30 days of job history to analyze it for costs under
	* BigQuery Editions while utilizing the new autoscaling feature that was introduced.
	* It does this for those using both PAYG (Pay As You Go) and commitment models.
	* It will also compare this versus running the query with the on-demand model.
	*
	* Note that this query utilizes some math modeling behaviors that the BigQuery
	* autoscaler uses. Namely these are the up to 10 seconds "slot scale up time,"
	* the minimum of 60 seconds "slot scale down time," and the behavior that the
	* autoscaler scales up and down in factors of 100 slots for each job.
	* Note that on scaling up and down it does bill for those periods of time.
	*
	* The costs generated by this are strictly an estimate and for Editions
	* should be considered a minimum cost of the query as the autoscaler
	* may not scale "evenly" and may change scaling during runtime.
	*/

	WITH jobs_duration AS
	(
	SELECT
	job_id,
	query,

	start_time,
	end_time,

	TIMESTAMP_DIFF(end_time, start_time, SECOND) AS job_duration_s,
	-- Add 60 seconds scale down time + 10 seconds scale up time to duration for autoscaled duration
	TIMESTAMP_DIFF(end_time, start_time, SECOND)+60+10 AS autoscaled_duration_s,

	-- Turn slot ms into slot secons
	total_slot_ms/ 1000 AS total_slot_s
	FROM
	`region-us`.INFORMATION_SCHEMA.JOBS
	WHERE
	creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 30 DAY) AND CURRENT_TIMESTAMP()
	--AND job_id NOT LIKE 'script_%'
	AND total_slot_ms > 0
	AND total_bytes_billed > 0
	AND state = "DONE"
	),
	jobs_slot_usage AS
	(
	SELECT
	jd.job_id,
	jd.query,
	job_duration_s,
	total_bytes_processed / POW(1024, 4) AS total_tib_processed,

	CAST(FLOOR((CEIL(total_slot_s / autoscaled_duration_s) + 99) / 100) * 100 AS INT64) AS autoscaled_slot_usage,
	autoscaled_duration_s,

	-- Slot usage (rounded up to next 100 slots) * duration in seconds / (60 seconds * 60 seconds) = slot/hour used by this job
	CAST(FLOOR((CEIL(total_slot_s / autoscaled_duration_s) + 99) / 100) * 100 AS INT64)
	* (autoscaled_duration_s/(60*60))
	AS slot_hour,
	FROM
	jobs_duration AS jd JOIN `region-us`.INFORMATION_SCHEMA.JOBS AS jobs
	ON jd.job_id = jobs.job_id
	)

	SELECT
	job_id,
	query,
	job_duration_s,
	total_tib_processed,

	-- Removed this because it has been a few months since the price change and this isn't very relevant anymore
	--ROUND(total_tib_processed * 5, 2) AS old_on_demand_cost,

	ROUND(total_tib_processed * 6.25, 2) AS on_demand_cost,

	autoscaled_duration_s,
	slot_hour,

	ROUND(slot_hour * 0.04, 2) AS standard_edition_cost,
	ROUND(slot_hour * 0.06, 2) AS enterprise_edition_cost,
	ROUND(slot_hour * 0.1, 2) AS enterprise_plus_edition_cost,

	ROUND(slot_hour * 0.048, 2) AS enterprise_edition_1yr_commit_cost,
	ROUND(slot_hour * 0.036, 2) AS enterprise_edition_3yr_commit_cost,

	ROUND(slot_hour * 0.08, 2) AS enterprise_plus_edition_1yr_commit_cost,
	ROUND(slot_hour * 0.06, 2) AS enterprise_plus_edition_3yr_commit_cost
	FROM
	jobs_slot_usage;