Skip to content

Instantly share code, notes, and snippets.

@jmaher
Created June 25, 2020 16:46
Show Gist options
  • Save jmaher/78957fc4b150db71a8a38fe8b8ed2282 to your computer and use it in GitHub Desktop.
Save jmaher/78957fc4b150db71a8a38fe8b8ed2282 to your computer and use it in GitHub Desktop.
DECLARE start_date DATE DEFAULT "2020-01-01";
DECLARE end_date DATE DEFAULT "2020-06-01";
-- granularity is date_part: ideally 'day' or 'month', could be 'week'
DECLARE granularity STRING DEFAULT "month";
DECLARE types ARRAY <STRING>;
set types = ['t-linux-large', 't-linux-xlarge', 'gecko-t/t-linux-xlarge', 'gecko-t/t-linux-large', 'gecko-t/t-win10-64', 'gecko-t/t-win10-64-gpu-s', 'gecko-t/t-win7-32', 'gecko-t/t-win7-32-gpu'];
-- set types = ['gecko-t/t-linux-large', 't-linux-large'];
SELECT
CASE
WHEN granularity = 'day' THEN DATE_TRUNC(orphaned_data.date, DAY)
WHEN granularity = 'month' THEN DATE_TRUNC(orphaned_data.date, MONTH)
END as date,
orphaned_data.workerPoolId as workerPoolId,
sum(orphaned_data.hours) as orphaned_hours
FROM (
SELECT
date,
workerPoolId,
instanceId,
sum(hours) as hours
FROM
taskclusteretl.tmp_aws_instance_hours
WHERE
workerPoolId in UNNEST(types)
AND date>=start_date
AND date<end_date
AND instanceId not in (
SELECT workerId from taskclusteretl.worker_metrics where workerPoolId in UNNEST(types) and date(timestamp)>=start_date and date(timestamp)<end_date)
GROUP BY
date,
workerPoolId,
instanceId
ORDER BY
date asc
) as orphaned_data
GROUP BY
date,
workerPoolId
ORDER BY
date asc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment