Last active
November 5, 2021 00:41
-
-
Save markrittman/ab15ae27adbcae879fe68ab637026248 to your computer and use it in GitHub Desktop.
First, Last, Even-Click and Time-Decay Multi-Touch Attribution Model for Snowplow (BigQuery Standard SQL)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
events AS ( | |
SELECT | |
events_true_tstamp_time as event_ts, | |
events_user_fingerprint AS user_pseudo_id, | |
events_user_id as user_id, | |
mkt_network as channel, | |
events_mkt_medium as medium, | |
events_mkt_source as source, | |
events_mkt_campaign as campaign, | |
events_event_name as event_name, | |
domain_sessionid AS session_id, | |
row_number() over (partition by events_user_fingerprint order by events_true_tstamp_time) AS session_number, | |
row_number() over (partition by domain_sessionid order by events_true_tstamp_time) AS event_in_session_number, | |
events_page_referrer AS referrer, | |
events_page_urlpath AS page_path, | |
events_page_title AS page_title, | |
geo_country as country, | |
geo_region as region, | |
geo_city as city, | |
useragent as user_agent | |
FROM | |
`ra-development.snowplow.all_events` -- modify to your project | |
), | |
id_stitching as ( | |
SELECT | |
DISTINCT user_pseudo_id as user_pseudo_id, | |
LAST_VALUE(user_id ignore nulls) OVER ( | |
partition by user_pseudo_id | |
order by event_ts | |
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING | |
) AS user_id, | |
MIN(event_ts) OVER ( | |
PARTITION BY user_pseudo_id | |
) AS first_seen_at, | |
MAX(event_ts) OVER ( | |
PARTITION BY user_pseudo_id | |
) AS last_seen_at | |
FROM | |
events), | |
sessions AS ( | |
SELECT | |
* | |
FROM | |
events | |
WHERE | |
event_in_session_number = 1 ), | |
user_stitched_sessions as ( | |
SELECT | |
sessions.*, | |
coalesce(id_stitching.user_id, sessions.user_pseudo_id) as blended_user_id | |
FROM sessions | |
LEFT JOIN id_stitching using (user_pseudo_id) | |
), | |
sessions_with_start_end_times AS ( | |
SELECT | |
* EXCEPT (event_ts), | |
event_ts AS session_start_ts, | |
CAST(LEAD(event_ts,1) OVER (PARTITION BY CONCAT(blended_user_id) | |
ORDER BY | |
event_ts) AS timestamp) AS session_end_ts | |
FROM | |
user_stitched_sessions ), | |
converting_events AS ( | |
SELECT | |
coalesce(id_stitching.user_id, events.user_pseudo_id) as blended_user_id, | |
FIRST_VALUE(CASE WHEN event_name in ('link_click') THEN session_id END) OVER (PARTITION BY coalesce(id_stitching.user_id, events.user_pseudo_id) ORDER BY event_ts ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as session_id, | |
1 AS count_conversions, | |
event_name, | |
CAST((MIN(CASE WHEN event_name in ('link_click') THEN event_ts END ) OVER (PARTITION BY coalesce(id_stitching.user_id, events.user_pseudo_id))) as timestamp) AS converted_ts, | |
FROM | |
events | |
LEFT JOIN id_stitching using (user_pseudo_id) | |
WHERE | |
event_name in ('link_click')), | |
converting_sessions AS ( | |
SELECT | |
* | |
FROM | |
converting_events | |
GROUP BY | |
1, | |
2, | |
3, | |
4, | |
5 ), | |
converting_sessions_deduped AS ( | |
SELECT | |
blended_user_id AS blended_user_id, | |
MAX(CASE | |
WHEN event_name = 'link_click' THEN session_id | |
END | |
) AS session_id, | |
MIN(converted_ts) AS converted_ts | |
FROM | |
converting_sessions | |
GROUP BY | |
1 ), | |
converting_sessions_deduped_labelled AS ( | |
SELECT | |
c.blended_user_id, | |
s.session_start_ts, | |
s.session_end_ts, | |
c.converted_ts, | |
s.session_id AS session_id, | |
s.session_number AS session_seq, | |
CASE | |
WHEN (c.converted_ts BETWEEN s.session_start_ts AND coalesce(s.session_end_ts, current_timestamp)) THEN TRUE | |
ELSE | |
FALSE | |
END | |
AS conversion_session, | |
CASE | |
WHEN (c.converted_ts BETWEEN s.session_start_ts AND coalesce(s.session_end_ts, current_timestamp)) THEN 1 | |
ELSE | |
0 | |
END | |
AS event, | |
source, | |
medium, | |
channel, | |
referrer, | |
page_path, | |
user_agent, | |
session_number, | |
country, | |
region, | |
city | |
FROM | |
sessions_with_start_end_times s | |
JOIN | |
converting_sessions_deduped c | |
ON | |
c.blended_user_id = s.blended_user_id | |
WHERE | |
c.converted_ts >= s.session_start_ts | |
ORDER BY | |
c.blended_user_id, | |
s.session_start_ts), | |
session_attrib_pct AS ( | |
SELECT | |
*, | |
CASE | |
WHEN session_id = LAST_VALUE(session_id) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) THEN 1 | |
ELSE | |
0 | |
END | |
AS LAST_click_attrib_pct, | |
CASE | |
WHEN session_id = FIRST_VALUE(session_id) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) THEN 1 | |
ELSE | |
0 | |
END | |
AS first_click_attrib_pct, | |
1/COUNT(session_id) OVER (PARTITION BY blended_user_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS even_click_attrib_pct, | |
CASE | |
WHEN session_start_ts = FIRST_VALUE(session_start_ts) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AND MAX(event) OVER (PARTITION BY blended_user_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1 THEN 1.1-ROW_NUMBER() OVER (PARTITION BY blended_user_id) | |
WHEN session_start_ts > LAG(session_start_ts) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts) | |
AND MAX(event) OVER (PARTITION BY blended_user_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1 THEN ROUND(1.1-1/ROW_NUMBER() OVER (PARTITION BY blended_user_id), 2) | |
ELSE | |
NULL | |
END | |
AS weights | |
FROM | |
converting_sessions_deduped_labelled), | |
session_attrib_pct_with_time_decay AS ( | |
SELECT | |
*, | |
ROUND(CASE | |
WHEN (weights=0 OR SUM(weights) OVER (PARTITION BY blended_user_id)=0) THEN 0 | |
ELSE | |
weights/SUM(weights) OVER (PARTITION BY blended_user_id) | |
END | |
, 2) AS time_decay_attrib_pct | |
FROM | |
session_attrib_pct) | |
SELECT | |
* EXCEPT (event, | |
weights, | |
converted_ts) | |
FROM | |
session_attrib_pct_with_time_decay | |
ORDER BY | |
blended_user_id, | |
session_start_ts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment