Created
June 25, 2020 16:08
-
-
Save jmaher/12b744511bebff9d71c889c064691efc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DECLARE start_date DATE DEFAULT "2020-01-01"; | |
DECLARE end_date DATE DEFAULT "2020-06-01"; | |
-- granularity is date_part: ideally 'day' or 'month', could be 'week' | |
DECLARE granularity STRING DEFAULT "month"; | |
DECLARE types ARRAY <STRING>; | |
set types = ['t-linux-large', 't-linux-xlarge', 'gecko-t/t-linux-xlarge', 'gecko-t/t-linux-large', 'gecko-t/t-win10-64', 'gecko-t/t-win10-64-gpu-s', 'gecko-t/t-win7-32', 'gecko-t/t-win7-32-gpu']; | |
CREATE TEMP FUNCTION | |
summary(a ARRAY<STRUCT<timestamp TIMESTAMP, | |
eventType STRING>>) | |
RETURNS STRUCT<timestamp TIMESTAMP, | |
elapsed_time NUMERIC, | |
overhead_time NUMERIC, | |
overprovision NUMERIC, | |
tasks NUMERIC, | |
extra NUMERIC, | |
task_time NUMERIC, | |
end_time NUMERIC, | |
over_tasks NUMERIC, | |
missed_shutdown NUMERIC, | |
bootstrap NUMERIC> | |
LANGUAGE js AS """ | |
var len = a.length; | |
var elapsed_time = 0; | |
var taskTime = 0; | |
var tasks = 0; | |
var total_tasks = 0; | |
var extra = 0; | |
var bootstrap = 0; | |
var endtime = 0; | |
var overprovision = 0; | |
var over_tasks = 0; | |
var missed_shutdown = 1; | |
var finished = false; | |
var startTime = 0; | |
var taskFinish = 0; | |
var instanceBoot = 0; | |
var firstBoot=true; | |
var lastTimestamp = a[0].timestamp; | |
for (iter=0; iter < len; iter++) { | |
var et = a[iter].eventType; | |
var ts = a[iter].timestamp; | |
if (et.startsWith("taskQueue")) { | |
tasks++; | |
if (finished == true) { | |
if ((taskFinish>0) && (((ts-taskFinish) / 1000) > 0)) extra++; | |
} | |
finished = false; | |
taskFinish = 0; | |
continue; | |
} | |
if (elapsed_time == 0) { | |
elapsed_time = a[len - 1].timestamp - ts; | |
if (et != "instanceBoot") instanceBoot = -1; | |
} | |
if (et == "instanceBoot" && instanceBoot==0) { | |
instanceBoot = ts; | |
} else if ((et.startsWith("workerReady")) && bootstrap==0 && instanceBoot>0) { | |
bootstrap += ts - instanceBoot | |
} else if (et == "taskStart") { | |
startTime = ts; | |
taskFinish = 0; | |
finished = false; | |
} else if (et.startsWith("taskFinish")) { | |
if (startTime == 0) { | |
startTime = lastTimestamp; // account for tasks crossing days | |
} | |
taskFinish = ts; | |
taskTime += taskFinish - startTime; | |
startTime = 0; | |
tasks--; | |
total_tasks++; | |
if (tasks == 0) finished = true; | |
} else if (et.startsWith("instanceShutdown")) { | |
missed_shutdown = 0; | |
} | |
lastTimestamp = a[iter].timestamp; | |
} | |
// account for tasks crossing days and finishing the next day | |
if (startTime > 0) { | |
taskTime += lastTimestamp-startTime; | |
missed_shutdown = 0; | |
} | |
if (missed_shutdown == 1 && lastTimestamp.getHours() < 23) { | |
missed_shutdown = 0; | |
} | |
if (a[a.length-1].eventType == 'instanceShutdown' && taskFinish != 0) { | |
endtime = a[a.length-1].timestamp - taskFinish; | |
} | |
if (bootstrap == -1) bootstrap = 0; | |
if (total_tasks == 0) { | |
overprovision += elapsed_time; | |
over_tasks = 1; | |
taskTime = 0; | |
bootstrap = 0; | |
endtime = 0; | |
} | |
return {elapsed_time:(elapsed_time / 1000), overhead_time:((elapsed_time-taskTime) / 1000), overprovision:(overprovision / 1000), tasks:total_tasks, extra:extra, task_time:(taskTime/1000), end_time:(endtime / 1000), over_tasks:over_tasks, missed_shutdown:missed_shutdown, bootstrap:(bootstrap / 1000)}; | |
"""; | |
WITH | |
nested AS ( | |
SELECT | |
CASE | |
WHEN granularity = 'day' THEN DATE_TRUNC(DATE(timestamp), DAY) | |
WHEN granularity = 'month' THEN DATE_TRUNC(DATE(timestamp), MONTH) | |
END as date, | |
workerId, | |
CASE | |
WHEN runId is null THEN 0 | |
ELSE runId | |
END as runId, | |
STRING_AGG(DISTINCT workerPoolId) AS workerPoolId, | |
ARRAY_AGG((timestamp, eventType) | |
ORDER BY | |
timestamp asc) AS events | |
FROM | |
taskclusteretl.worker_metrics | |
WHERE | |
DATE_TRUNC(DATE(timestamp), DAY) >= start_date | |
AND DATE_TRUNC(DATE(timestamp), DAY) < end_date | |
AND workerPoolId in UNNEST(types) | |
GROUP BY | |
date, | |
workerId, | |
runId), | |
worker_summary AS ( | |
SELECT | |
date, | |
workerId, | |
workerPoolId, | |
summary(events) AS times | |
FROM | |
nested) | |
SELECT | |
date, | |
CASE | |
WHEN workerPoolId='t-linux-large' THEN 'gecko-t/t-linux-large' | |
WHEN workerPoolId='t-linux-xlarge' THEN 'gecko-t/t-linux-xlarge' | |
ELSE workerPoolId | |
END as workerPoolId, | |
-- workerId, | |
-- sum(times.bootstrap), | |
-- times.end_time | |
count(workerId) as total_workers, | |
sum(times.tasks) as total_tasks, | |
sum(times.over_tasks) as overprovisioned_workers, | |
sum(times.missed_shutdown) as missed_shutdown, | |
CAST(SUM(times.overprovision)/3600 AS INT64) as overprovision_hours, | |
CAST(sum(times.bootstrap)/3600 AS INT64) as bootstrap_hours, | |
CAST(sum(times.end_time)/3600 AS int64) as end_hours, | |
CAST(sum(times.task_time)/3600 AS int64) as task_hours, | |
CAST(sum(times.overhead_time)/3600 as INT64) as idle_hours, | |
CAST(sum(times.elapsed_time)/3600 as INT64) as total_hours, | |
-- sum(times.extra) as added_tasks, | |
-- CAST(SAFE_DIVIDE(sum(times.bootstrap), countif(times.bootstrap>0)) as INT64) as avg_bootstrap_sec, | |
-- CAST(SAFE_DIVIDE(sum(times.end_time), countif(times.end_time>0)) as INT64) as avg_endtime_sec, | |
-- avg_endtime_sec * total_workers - avg_bootstrap_sec * added_tasks | |
-- CAST( ((( (sum(times.end_time)/countif(times.end_time>0))*count(workerId)) - ((sum(times.extra)*sum(times.bootstrap))/countif(times.bootstrap>0)))/3600) as INT64) as saved_hours, | |
-- avg_endtime_sec * total_workers - avg_bootstrap_sec * added_tasks / total_hours *100 | |
-- CAST( (((((sum(times.end_time)/countif(times.end_time>0))*count(workerId)) - ((sum(times.extra)*sum(times.bootstrap))/countif(times.bootstrap>0)))/3600)/(sum(times.elapsed_time)/3600))*100 as INT64) as saved_hours_pct_total | |
FROM | |
worker_summary | |
GROUP BY | |
date, workerPoolId order by date,workerPoolId asc |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment