Created
June 9, 2020 20:40
-
-
Save jmaher/63657c23d0f2149cfed577005490f583 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DECLARE start_date DATE DEFAULT "2020-04-20"; | |
DECLARE end_date DATE DEFAULT "2020-04-24"; | |
-- granularity is date_part: ideally 'day' or 'month', could be 'week' | |
DECLARE granularity STRING DEFAULT "day"; | |
DECLARE types ARRAY <STRING>; | |
set types = ['gecko-t/t-linux-xlarge', 'gecko-t/t-linux-large', 'gecko-t/t-win10-64', 'gecko-t/t-win10-64-gpu-s', 'gecko-t/t-win7-32', 'gecko-t/t-win7-32-gpu']; | |
set types = ['gecko-t/t-win10-64-gpu-s']; | |
SELECT | |
orphaned.date, | |
orphaned.workerPoolId, | |
sum(orphaned.hours) as orphaned_hours, | |
sum(failed_reboot.bill_hours) as bill_hours, | |
sum(failed_reboot.worker_hours) as worker_hours, | |
sum(orphaned.hours)+sum(failed_reboot.bill_hours)-(sum(failed_reboot.worker_hours)) as hours_orphaned | |
FROM ( | |
SELECT | |
failed_data.date as date, | |
failed_data.workerPoolId as workerPoolId, | |
sum(failed_data.bill_hours) as bill_hours, | |
sum(worker_seconds)/3600 as worker_hours from ( | |
SELECT | |
CASE | |
WHEN granularity = 'day' THEN DATE_TRUNC(DATE(wm.timestamp), DAY) | |
WHEN granularity = 'month' THEN DATE_TRUNC(DATE(wm.timestamp), MONTH) | |
END as date, | |
wm.workerPoolId, | |
aws.hours as bill_hours, | |
TIMESTAMP_DIFF(max(wm.timestamp), min(wm.timestamp), SECOND) as worker_seconds | |
FROM | |
taskclusteretl.worker_metrics as wm, taskclusteretl.tmp_aws_instance_hours as aws | |
WHERE | |
wm.workerPoolId in UNNEST(types) | |
AND date(wm.timestamp)>=start_date | |
AND date(wm.timestamp)<end_date | |
AND aws.date>=start_date | |
AND aws.date<end_date | |
AND aws.workerPoolId in UNNEST(types) | |
AND wm.eventType not like 'taskQueue%' | |
AND wm.workerId in ( | |
SELECT instanceId from taskclusteretl.tmp_aws_instance_hours where workerPoolId in UNNEST(types) and date>=start_date and date<end_date and hours>=20) | |
AND wm.workerId=aws.instanceId | |
GROUP BY | |
date, | |
wm.workerPoolId, | |
aws.hours | |
ORDER BY | |
date asc | |
) as failed_data | |
GROUP BY | |
failed_data.date, | |
failed_data.workerPoolId | |
ORDER BY | |
date asc | |
) as failed_reboot, ( | |
SELECT | |
date, | |
workerPoolId, | |
sum(hours) as hours | |
FROM | |
taskclusteretl.tmp_aws_instance_hours | |
WHERE | |
workerPoolId in UNNEST(types) | |
AND date>=start_date | |
AND date<end_date | |
AND instanceId not in ( | |
SELECT workerId from taskclusteretl.worker_metrics where workerPoolId in UNNEST(types) and date(timestamp)>=start_date and date(timestamp)<end_date) | |
GROUP BY | |
date, workerPoolId | |
ORDER BY | |
date asc | |
) as orphaned | |
WHERE | |
failed_reboot.date=orphaned.date | |
AND failed_reboot.workerPoolId=orphaned.workerPoolId | |
GROUP BY | |
orphaned.date, orphaned.workerPoolId | |
ORDER BY | |
date asc | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment