Skip to content

Instantly share code, notes, and snippets.

@jklukas
Last active March 19, 2021 20:10
Show Gist options
  • Save jklukas/f83bc64963712bec34e0b8dc0f546646 to your computer and use it in GitHub Desktop.
Save jklukas/f83bc64963712bec34e0b8dc0f546646 to your computer and use it in GitHub Desktop.
Prototype of ios baseline+metrics ETL
/*
CREATE OR REPLACE TABLE `mozdata.analysis.klukas_ios_cls_joined`
PARTITION BY
submission_date
CLUSTER BY
sample_id
AS
WITH b AS (
SELECT *
FROM `moz-fx-data-shared-prod.org_mozilla_ios_firefox_derived.baseline_clients_last_seen_v1`
WHERE submission_date = '2021-01-01'
),
m AS (
SELECT * FROM `mozdata.analysis.klukas_ios_metrics_last_seen`
WHERE submission_date = '2021-01-02'
)
SELECT b.submission_date, * EXCEPT(submission_date) FROM
b LEFT JOIN m
USING (client_id, sample_id)
*/
WITH b AS (
SELECT *
FROM `moz-fx-data-shared-prod.org_mozilla_ios_firefox_derived.baseline_clients_last_seen_v1`
WHERE submission_date = @submission_date
),
m AS (
SELECT * FROM `mozdata.analysis.klukas_ios_metrics_last_seen`
WHERE submission_date = DATE_ADD(@submission_date, INTERVAL 1 DAY)
)
SELECT b.submission_date, * EXCEPT(submission_date) FROM
b LEFT JOIN m
USING (client_id, sample_id)
/*
DROP TABLE
mozdata.analysis.klukas_ios_metrics_last_seen;
CREATE TABLE
mozdata.analysis.klukas_ios_metrics_last_seen (submission_date DATE,
client_id STRING,
sample_id INT64,
n_metrics_ping INT64,
days_sent_metrics_ping_bits INT64,
search_default_engine STRING,
sum_app_opened_as_default_browser INT64,
days_app_opened_as_default_browser_bits INT64,
sum_search_counts ARRAY<STRUCT<key STRING,
value INT64>> )
PARTITION BY
submission_date
CLUSTER BY
sample_id ;
*/
WITH
_previous AS (
SELECT
* EXCEPT(submission_date)
FROM
mozdata.analysis.klukas_ios_metrics_last_seen
WHERE
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
AND udf.shift_28_bits_one_day(days_sent_metrics_ping_bits) > 0
),
base AS (
SELECT
client_info.client_id,
sample_id,
metrics.labeled_counter.search_counts,
metrics.string.search_default_engine,
metrics.counter.app_opened_as_default_browser,
FROM
`moz-fx-data-shared-prod.org_mozilla_ios_firefox_stable.metrics_v1` AS m
WHERE
DATE(submission_timestamp) = @submission_date
),
_current_pre AS (
SELECT
client_id,
sample_id,
COUNT(*) AS n_metrics_ping,
1 AS days_sent_metrics_ping_bits,
mozfun.stats.mode_last(ARRAY_AGG(search_default_engine)) AS search_default_engine,
SUM(app_opened_as_default_browser) AS sum_app_opened_as_default_browser,
CAST(SUM(app_opened_as_default_browser) > 0 AS INT64) AS days_app_opened_as_default_browser_bits,
ARRAY_CONCAT_AGG(search_counts) AS all_search_counts,
FROM
base
GROUP BY
client_id,
sample_id ),
_current AS (
SELECT
* EXCEPT(all_search_counts),
ARRAY((
SELECT
AS STRUCT key,
SUM(value) AS value
FROM
UNNEST(all_search_counts)
GROUP BY
key)) AS sum_search_counts
FROM
_current_pre )
SELECT
DATE(@submission_date) AS submission_date,
client_id,
sample_id,
_current.n_metrics_ping,
udf.combine_adjacent_days_28_bits(_previous.days_sent_metrics_ping_bits,
_current.days_sent_metrics_ping_bits) AS days_sent_metrics_ping_bits,
COALESCE(_current.search_default_engine,
_previous.search_default_engine ) AS search_default_engine,
_current.sum_app_opened_as_default_browser,
udf.combine_adjacent_days_28_bits(_previous.days_app_opened_as_default_browser_bits,
_current.days_app_opened_as_default_browser_bits) AS days_app_opened_as_default_browser_bits,
_current.sum_search_counts,
FROM
_previous
FULL JOIN
_current
USING
(client_id,
sample_id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment