Created
February 8, 2020 22:56
-
-
Save markrittman/2e7f0d530338e167f8255b5c083dbc76 to your computer and use it in GitHub Desktop.
Multi-Channel Attribution Standard SQL query for Google BigQuery sourcing session data from Segment and attributed value from operational system
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH session_attributiON AS | |
(SELECT *, | |
CASE WHEN session_id = LAST_VALUE(session_id) | |
OVER (partitiON by user_id order by session_start_tstamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) THEN 1 | |
ELSE 0 | |
END AS LAST_click_attrib_pct, | |
CASE WHEN session_id = FIRST_VALUE(session_id) | |
OVER (partitiON by user_id order by session_start_tstamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) THEN 1 | |
ELSE 0 | |
END AS first_click_attrib_pct, | |
1/count(session_id) OVER (partitiON by worker_id) AS even_click_attrib_pct, | |
CASE WHEN session_start_tstamp <= timestamp_sub(created_at, interval 7 day) THEN 1 | |
WHEN session_start_tstamp > timestamp_sub(created_at, interval 14 day) THEN .5 | |
WHEN session_start_tstamp > timestamp_sub(created_at, interval 14 day) AND session_start_tstamp > timestamp_sub(created_at, interval 21 day) THEN .25 | |
WHEN session_start_tstamp > timestamp_sub(created_at, interval 21 day) AND session_start_tstamp > timestamp_sub(created_at, interval 28 day) THEN .125 | |
ELSE 0 | |
END AS time_decay_attrib_pct | |
FROM | |
( | |
SELECT w.full_name, | |
s.session_start_tstamp, | |
s.session_end_tstamp, | |
w.created_at, | |
w.id AS user_id, | |
s.session_id, | |
row_number() over | |
(partitiON by w.id order by s.session_start_tstamp) AS session_seq, | |
CASE WHEN w.created_at between s.session_start_tstamp and s.session_end_tstamp then true | |
else false | |
end AS conversion_session, | |
CASE WHEN w.created_at < s.session_start_tstamp then true | |
else false | |
end AS prospect_session, | |
coalesce(s.utm_source,'Direct') utm_source, | |
coalesce(s.utm_content,'Direct') utm_content, | |
coalesce(s.utm_medium,'Direct') utm_medium, | |
coalesce(s.utm_campaign,'Direct') utm_campaign, | |
s.first_page_url_path AS entrance_url_path, | |
s.last_page_url_path AS exit_url_path, | |
referrer, | |
duration_in_s, | |
page_views | |
FROM `ra-development.analytics.segment_web_sessions__stitched` s | |
JOIN `ra-development.analytics.users` w | |
ON cast(w.id AS string) = s.blended_user_id | |
WHERE w.created_at >= s.session_start_tstamp | |
ORDER BY w.id, s.session_start_tstamp) | |
), | |
user_transactions AS | |
( | |
SELECT user_id AS user_id, | |
SUM(transactions) AS total_user_transactions | |
FROM `ra-development.analytics.user_transactions` | |
WHERE is_approved | |
GROUP BY 1 | |
) | |
SELECT session_attribution.*, | |
total_user_transactions * first_click_attrib_pct AS first_click_attrib_total_transactions, | |
total_user_transactions * last_click_attrib_pct AS last_click_attrib_total_transactions, | |
total_user_transactions * even_click_attrib_pct AS even_click_attrib_total_transactions, | |
total_user_transactions * time_decay_attrib_pct AS time_decay_attrib_total_transactions, | |
1 * first_click_attrib_pct AS first_click_attrib_registrations, | |
1 * last_click_attrib_pct AS last_click_attrib_registrations, | |
1 * even_click_attrib_pct AS even_click_attrib_registrations, | |
1 * time_decay_attrib_pct AS time_decay_attrib_registrations | |
FROM session_attribution | |
JOIN user_transactions | |
ON session_attribution.user_id = user_transactions.user_id |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Centralizing data, creating good-enough attribution model, establishing a pipepline with DBT and presenting with Looker : and make this happen in short time frame with the modern analytics engineering. This is nice and elegant. Kudos to Rittman team!