jmaher/gist:78957fc4b150db71a8a38fe8b8ed2282

## gistfile1.txt
DECLARE start_date DATE DEFAULT "2020-01-01";
DECLARE end_date DATE DEFAULT "2020-06-01";
-- granularity is date_part: ideally 'day' or 'month', could be 'week'
DECLARE granularity STRING DEFAULT "month";
DECLARE types ARRAY <STRING>;
set types = ['t-linux-large', 't-linux-xlarge', 'gecko-t/t-linux-xlarge', 'gecko-t/t-linux-large', 'gecko-t/t-win10-64', 'gecko-t/t-win10-64-gpu-s', 'gecko-t/t-win7-32', 'gecko-t/t-win7-32-gpu'];
-- set types = ['gecko-t/t-linux-large', 't-linux-large'];

SELECT
    CASE
      WHEN granularity = 'day' THEN DATE_TRUNC(orphaned_data.date, DAY)
      WHEN granularity = 'month' THEN DATE_TRUNC(orphaned_data.date, MONTH)
    END as date,
    orphaned_data.workerPoolId as workerPoolId,
    sum(orphaned_data.hours) as orphaned_hours
FROM (
  SELECT
      date,
      workerPoolId,
      instanceId,
      sum(hours) as hours
    FROM
      taskclusteretl.tmp_aws_instance_hours
    WHERE
      workerPoolId in UNNEST(types)
      AND date>=start_date
      AND date<end_date
      AND instanceId not in (
        SELECT workerId from taskclusteretl.worker_metrics where workerPoolId in UNNEST(types) and date(timestamp)>=start_date and date(timestamp)<end_date)
    GROUP BY
      date,
      workerPoolId,
      instanceId
    ORDER BY
      date asc
) as orphaned_data
GROUP BY
  date,
  workerPoolId
ORDER BY
  date asc
	DECLARE start_date DATE DEFAULT "2020-01-01";
	DECLARE end_date DATE DEFAULT "2020-06-01";
	-- granularity is date_part: ideally 'day' or 'month', could be 'week'
	DECLARE granularity STRING DEFAULT "month";
	DECLARE types ARRAY <STRING>;
	set types = ['t-linux-large', 't-linux-xlarge', 'gecko-t/t-linux-xlarge', 'gecko-t/t-linux-large', 'gecko-t/t-win10-64', 'gecko-t/t-win10-64-gpu-s', 'gecko-t/t-win7-32', 'gecko-t/t-win7-32-gpu'];
	-- set types = ['gecko-t/t-linux-large', 't-linux-large'];

	SELECT
	CASE
	WHEN granularity = 'day' THEN DATE_TRUNC(orphaned_data.date, DAY)
	WHEN granularity = 'month' THEN DATE_TRUNC(orphaned_data.date, MONTH)
	END as date,
	orphaned_data.workerPoolId as workerPoolId,
	sum(orphaned_data.hours) as orphaned_hours
	FROM (
	SELECT
	date,
	workerPoolId,
	instanceId,
	sum(hours) as hours
	FROM
	taskclusteretl.tmp_aws_instance_hours
	WHERE
	workerPoolId in UNNEST(types)
	AND date>=start_date
	AND date<end_date
	AND instanceId not in (
	SELECT workerId from taskclusteretl.worker_metrics where workerPoolId in UNNEST(types) and date(timestamp)>=start_date and date(timestamp)<end_date)
	GROUP BY
	date,
	workerPoolId,
	instanceId
	ORDER BY
	date asc
	) as orphaned_data
	GROUP BY
	date,
	workerPoolId
	ORDER BY
	date asc