Skip to content

Instantly share code, notes, and snippets.

@markrittman
Last active November 5, 2021 00:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save markrittman/ab15ae27adbcae879fe68ab637026248 to your computer and use it in GitHub Desktop.
Save markrittman/ab15ae27adbcae879fe68ab637026248 to your computer and use it in GitHub Desktop.
First, Last, Even-Click and Time-Decay Multi-Touch Attribution Model for Snowplow (BigQuery Standard SQL)
WITH
events AS (
SELECT
events_true_tstamp_time as event_ts,
events_user_fingerprint AS user_pseudo_id,
events_user_id as user_id,
mkt_network as channel,
events_mkt_medium as medium,
events_mkt_source as source,
events_mkt_campaign as campaign,
events_event_name as event_name,
domain_sessionid AS session_id,
row_number() over (partition by events_user_fingerprint order by events_true_tstamp_time) AS session_number,
row_number() over (partition by domain_sessionid order by events_true_tstamp_time) AS event_in_session_number,
events_page_referrer AS referrer,
events_page_urlpath AS page_path,
events_page_title AS page_title,
geo_country as country,
geo_region as region,
geo_city as city,
useragent as user_agent
FROM
`ra-development.snowplow.all_events` -- modify to your project
),
id_stitching as (
SELECT
DISTINCT user_pseudo_id as user_pseudo_id,
LAST_VALUE(user_id ignore nulls) OVER (
partition by user_pseudo_id
order by event_ts
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS user_id,
MIN(event_ts) OVER (
PARTITION BY user_pseudo_id
) AS first_seen_at,
MAX(event_ts) OVER (
PARTITION BY user_pseudo_id
) AS last_seen_at
FROM
events),
sessions AS (
SELECT
*
FROM
events
WHERE
event_in_session_number = 1 ),
user_stitched_sessions as (
SELECT
sessions.*,
coalesce(id_stitching.user_id, sessions.user_pseudo_id) as blended_user_id
FROM sessions
LEFT JOIN id_stitching using (user_pseudo_id)
),
sessions_with_start_end_times AS (
SELECT
* EXCEPT (event_ts),
event_ts AS session_start_ts,
CAST(LEAD(event_ts,1) OVER (PARTITION BY CONCAT(blended_user_id)
ORDER BY
event_ts) AS timestamp) AS session_end_ts
FROM
user_stitched_sessions ),
converting_events AS (
SELECT
coalesce(id_stitching.user_id, events.user_pseudo_id) as blended_user_id,
FIRST_VALUE(CASE WHEN event_name in ('link_click') THEN session_id END) OVER (PARTITION BY coalesce(id_stitching.user_id, events.user_pseudo_id) ORDER BY event_ts ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as session_id,
1 AS count_conversions,
event_name,
CAST((MIN(CASE WHEN event_name in ('link_click') THEN event_ts END ) OVER (PARTITION BY coalesce(id_stitching.user_id, events.user_pseudo_id))) as timestamp) AS converted_ts,
FROM
events
LEFT JOIN id_stitching using (user_pseudo_id)
WHERE
event_name in ('link_click')),
converting_sessions AS (
SELECT
*
FROM
converting_events
GROUP BY
1,
2,
3,
4,
5 ),
converting_sessions_deduped AS (
SELECT
blended_user_id AS blended_user_id,
MAX(CASE
WHEN event_name = 'link_click' THEN session_id
END
) AS session_id,
MIN(converted_ts) AS converted_ts
FROM
converting_sessions
GROUP BY
1 ),
converting_sessions_deduped_labelled AS (
SELECT
c.blended_user_id,
s.session_start_ts,
s.session_end_ts,
c.converted_ts,
s.session_id AS session_id,
s.session_number AS session_seq,
CASE
WHEN (c.converted_ts BETWEEN s.session_start_ts AND coalesce(s.session_end_ts, current_timestamp)) THEN TRUE
ELSE
FALSE
END
AS conversion_session,
CASE
WHEN (c.converted_ts BETWEEN s.session_start_ts AND coalesce(s.session_end_ts, current_timestamp)) THEN 1
ELSE
0
END
AS event,
source,
medium,
channel,
referrer,
page_path,
user_agent,
session_number,
country,
region,
city
FROM
sessions_with_start_end_times s
JOIN
converting_sessions_deduped c
ON
c.blended_user_id = s.blended_user_id
WHERE
c.converted_ts >= s.session_start_ts
ORDER BY
c.blended_user_id,
s.session_start_ts),
session_attrib_pct AS (
SELECT
*,
CASE
WHEN session_id = LAST_VALUE(session_id) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) THEN 1
ELSE
0
END
AS LAST_click_attrib_pct,
CASE
WHEN session_id = FIRST_VALUE(session_id) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) THEN 1
ELSE
0
END
AS first_click_attrib_pct,
1/COUNT(session_id) OVER (PARTITION BY blended_user_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS even_click_attrib_pct,
CASE
WHEN session_start_ts = FIRST_VALUE(session_start_ts) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AND MAX(event) OVER (PARTITION BY blended_user_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1 THEN 1.1-ROW_NUMBER() OVER (PARTITION BY blended_user_id)
WHEN session_start_ts > LAG(session_start_ts) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts)
AND MAX(event) OVER (PARTITION BY blended_user_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1 THEN ROUND(1.1-1/ROW_NUMBER() OVER (PARTITION BY blended_user_id), 2)
ELSE
NULL
END
AS weights
FROM
converting_sessions_deduped_labelled),
session_attrib_pct_with_time_decay AS (
SELECT
*,
ROUND(CASE
WHEN (weights=0 OR SUM(weights) OVER (PARTITION BY blended_user_id)=0) THEN 0
ELSE
weights/SUM(weights) OVER (PARTITION BY blended_user_id)
END
, 2) AS time_decay_attrib_pct
FROM
session_attrib_pct)
SELECT
* EXCEPT (event,
weights,
converted_ts)
FROM
session_attrib_pct_with_time_decay
ORDER BY
blended_user_id,
session_start_ts
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment