Skip to content

Instantly share code, notes, and snippets.

@tomhanlon
Created April 5, 2022 18:50
Show Gist options
  • Save tomhanlon/326fc47fa6344343e50bb9d7fca52162 to your computer and use it in GitHub Desktop.
Save tomhanlon/326fc47fa6344343e50bb9d7fca52162 to your computer and use it in GitHub Desktop.
-- www - Primary Header CTA Clicked (count)
WITH __rawExperiment as (
SELECT
user_id,
anonymous_id,
received_at as timestamp,
experiment_id,
variation_id
FROM
`datastax-datalake.www.tracks_view`
),
__experiment as (
-- Viewed Experiment
SELECT
e.anonymous_id as user_id,
cast(e.variation_id as string) as variation,
CAST(e.timestamp as DATETIME) as conversion_start,
DATETIME_ADD(CAST(e.timestamp as DATETIME), INTERVAL 72 HOUR) as conversion_end
FROM
__rawExperiment e
WHERE
e.experiment_id = 'header-aa-v10'
AND CAST(e.timestamp as DATETIME) >= DATETIME "2022-04-04 19:44:00"
),
__metric as (
-- Metric (www - Primary Header CTA Clicked)
SELECT
m.anonymous_id as user_id,
m.value as value,
CAST(m.timestamp as DATETIME) as conversion_start,
CAST(m.timestamp as DATETIME) as conversion_end
FROM
(
SELECT
1 as value,
user_id as user_id,
anonymous_id as anonymous_id,
timestamp as timestamp,
FROM
`www.www_primary_header_cta_click`
) m
WHERE
CAST(m.timestamp as DATETIME) >= DATETIME "2022-04-04 19:44:00"
),
__distinctUsers as (
-- One row per user/dimension
SELECT
e.user_id,
'All' as dimension,
(
CASE
WHEN count(distinct e.variation) > 1 THEN '__multiple__'
ELSE max(e.variation) END
) as variation,
MIN(e.conversion_start) as conversion_start,
MIN(e.conversion_end) as conversion_end
FROM
__experiment e
GROUP BY
dimension,
e.user_id
),
__userMetric as (
-- Add in the aggregate metric value for each user
SELECT
d.variation,
d.dimension,
SUM(value) as value
FROM
__distinctUsers d
JOIN __metric m ON (m.user_id = d.user_id)
WHERE
m.conversion_start >= d.conversion_start
AND m.conversion_start <= d.conversion_end
GROUP BY
variation,
dimension,
d.user_id
),
__overallUsers as (
-- Number of users in each variation
SELECT
variation,
dimension,
COUNT(*) as users
FROM
__distinctUsers
GROUP BY
variation,
dimension
),
__stats as (
-- Sum all user metrics together to get a total per variation/dimension
SELECT
variation,
dimension,
COUNT(*) as count,
AVG(value) as mean,
STDDEV(value) as stddev
FROM
__userMetric
GROUP BY
variation,
dimension
)
SELECT
s.variation,
s.dimension,
s.count,
s.mean,
s.stddev,
u.users
FROM
__stats s
JOIN __overallUsers u ON (
s.variation = u.variation
AND s.dimension = u.dimension
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment