Created
January 16, 2020 19:15
-
-
Save jklukas/6f948d66789e211e7a4470bd82e84cf4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
-- Distinct document_ids and their minimum submission_timestamp today | |
-- not including document_ids that only occur on or after @end_time | |
distinct_document_ids AS ( | |
SELECT | |
document_id, | |
MIN(submission_timestamp) AS submission_timestamp | |
FROM | |
`moz-fx-data-shared-prod.telemetry_live.main_v4` | |
WHERE | |
DATE(submission_timestamp) >= DATE_SUB( | |
DATE(@start_time), | |
INTERVAL @num_preceding_days DAY | |
) | |
AND submission_timestamp < @end_time | |
GROUP BY | |
document_id | |
HAVING | |
submission_timestamp >= @start_time), | |
-- A single slice of a live ping table. | |
base AS ( | |
SELECT | |
* | |
FROM | |
`moz-fx-data-shared-prod.telemetry_live.main_v4` | |
JOIN | |
distinct_document_ids | |
USING | |
-- Retain only the first seen documents for each ID, according to timestamp. | |
(document_id, submission_timestamp) | |
WHERE | |
submission_timestamp >= @start_time | |
AND submission_timestamp < @end_time), | |
-- | |
-- Order documents by assigning a row number. | |
numbered_duplicates AS ( | |
SELECT | |
*, | |
ROW_NUMBER() OVER (PARTITION BY document_id) AS _n | |
FROM | |
base) | |
-- | |
-- Retain only one document for each ID. | |
SELECT | |
* EXCEPT(_n) | |
FROM | |
numbered_duplicates | |
WHERE | |
_n = 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment