Skip to content

Instantly share code, notes, and snippets.

@jmaher
Created June 9, 2020 20:40
Show Gist options
  • Save jmaher/63657c23d0f2149cfed577005490f583 to your computer and use it in GitHub Desktop.
Save jmaher/63657c23d0f2149cfed577005490f583 to your computer and use it in GitHub Desktop.
DECLARE start_date DATE DEFAULT "2020-04-20";
DECLARE end_date DATE DEFAULT "2020-04-24";
-- granularity is date_part: ideally 'day' or 'month', could be 'week'
DECLARE granularity STRING DEFAULT "day";
DECLARE types ARRAY <STRING>;
set types = ['gecko-t/t-linux-xlarge', 'gecko-t/t-linux-large', 'gecko-t/t-win10-64', 'gecko-t/t-win10-64-gpu-s', 'gecko-t/t-win7-32', 'gecko-t/t-win7-32-gpu'];
set types = ['gecko-t/t-win10-64-gpu-s'];
SELECT
orphaned.date,
orphaned.workerPoolId,
sum(orphaned.hours) as orphaned_hours,
sum(failed_reboot.bill_hours) as bill_hours,
sum(failed_reboot.worker_hours) as worker_hours,
sum(orphaned.hours)+sum(failed_reboot.bill_hours)-(sum(failed_reboot.worker_hours)) as hours_orphaned
FROM (
SELECT
failed_data.date as date,
failed_data.workerPoolId as workerPoolId,
sum(failed_data.bill_hours) as bill_hours,
sum(worker_seconds)/3600 as worker_hours from (
SELECT
CASE
WHEN granularity = 'day' THEN DATE_TRUNC(DATE(wm.timestamp), DAY)
WHEN granularity = 'month' THEN DATE_TRUNC(DATE(wm.timestamp), MONTH)
END as date,
wm.workerPoolId,
aws.hours as bill_hours,
TIMESTAMP_DIFF(max(wm.timestamp), min(wm.timestamp), SECOND) as worker_seconds
FROM
taskclusteretl.worker_metrics as wm, taskclusteretl.tmp_aws_instance_hours as aws
WHERE
wm.workerPoolId in UNNEST(types)
AND date(wm.timestamp)>=start_date
AND date(wm.timestamp)<end_date
AND aws.date>=start_date
AND aws.date<end_date
AND aws.workerPoolId in UNNEST(types)
AND wm.eventType not like 'taskQueue%'
AND wm.workerId in (
SELECT instanceId from taskclusteretl.tmp_aws_instance_hours where workerPoolId in UNNEST(types) and date>=start_date and date<end_date and hours>=20)
AND wm.workerId=aws.instanceId
GROUP BY
date,
wm.workerPoolId,
aws.hours
ORDER BY
date asc
) as failed_data
GROUP BY
failed_data.date,
failed_data.workerPoolId
ORDER BY
date asc
) as failed_reboot, (
SELECT
date,
workerPoolId,
sum(hours) as hours
FROM
taskclusteretl.tmp_aws_instance_hours
WHERE
workerPoolId in UNNEST(types)
AND date>=start_date
AND date<end_date
AND instanceId not in (
SELECT workerId from taskclusteretl.worker_metrics where workerPoolId in UNNEST(types) and date(timestamp)>=start_date and date(timestamp)<end_date)
GROUP BY
date, workerPoolId
ORDER BY
date asc
) as orphaned
WHERE
failed_reboot.date=orphaned.date
AND failed_reboot.workerPoolId=orphaned.workerPoolId
GROUP BY
orphaned.date, orphaned.workerPoolId
ORDER BY
date asc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment