Skip to content

Instantly share code, notes, and snippets.

@markrittman
Last active February 2, 2023 17:54
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save markrittman/5fa843ab279743771d71eab12b2d854c to your computer and use it in GitHub Desktop.
Save markrittman/5fa843ab279743771d71eab12b2d854c to your computer and use it in GitHub Desktop.
First-Click, Last-Click, Even-Click and Time Decay Attribution for GA4
/*
First-Click, Last-Click, Even-Click and Time Decay Attribution for GA4
Replace "event_name in ('Contact Us','Contact Us Clicked','CTA Pressed','Form Submitted')" with the name(s) of your conversion event in lines 54, 57, 61 and 76
*/
WITH
events AS (
SELECT
event_timestamp as event_ts,
user_pseudo_id AS user_pseudo_id,
user_id,
traffic_source.name as channel,
traffic_source.medium as medium,
traffic_source.source as source,
event_name,
(SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS session_id,
(SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_number') AS session_number,
(SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_referrer') AS referrer,
(SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location') AS page_path,
(SELECT value.string_value FROM UNNEST(event_params) WHERE event_name = 'page_view' AND key = 'page_title') AS page_title,
device.category as device_category,
device.operating_system,
device.language,
device.is_limited_ad_tracking,
device.web_info.browser,
device.web_info.hostname,
geo.continent,
geo.country,
geo.region,
geo.city
FROM
`ra-development.analytics_277223877.events_*` -- modify to your project
),
id_stitching as (
SELECT
DISTINCT user_pseudo_id as user_pseudo_id,
LAST_VALUE(user_id ignore nulls) OVER (
partition by user_pseudo_id
order by event_ts
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS user_id,
MIN(event_ts) OVER (
PARTITION BY user_pseudo_id
) AS first_seen_at,
MAX(event_ts) OVER (
PARTITION BY user_pseudo_id
) AS last_seen_at
FROM
events),
sessions AS (
SELECT
*
FROM
events
WHERE
event_name = 'session_start' ),
user_stitched_sessions as (
SELECT
sessions.*,
coalesce(id_stitching.user_id, sessions.user_pseudo_id) as blended_user_id
FROM sessions
LEFT JOIN id_stitching using (user_pseudo_id)
),
sessions_with_start_end_times AS (
SELECT
* EXCEPT (event_ts),
TIMESTAMP_MICROS(event_ts) AS session_start_ts,
CAST(LEAD(TIMESTAMP_MICROS(event_ts),1) OVER (PARTITION BY CONCAT(blended_user_id)
ORDER BY
event_ts) AS timestamp) AS session_end_ts
FROM
user_stitched_sessions ),
converting_events AS (
SELECT
coalesce(id_stitching.user_id, events.user_pseudo_id) as blended_user_id,
FIRST_VALUE(CASE WHEN event_name in ('Contact Us','Contact Us Clicked','CTA Pressed','Form Submitted') THEN session_id END) OVER (PARTITION BY coalesce(id_stitching.user_id, events.user_pseudo_id) ORDER BY event_ts ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as session_id,
1 AS count_conversions,
event_name,
CAST(TIMESTAMP_MICROS(MIN(CASE WHEN event_name in ('Contact Us','Contact Us Clicked','CTA Pressed','Form Submitted') THEN event_ts END ) OVER (PARTITION BY coalesce(id_stitching.user_id, events.user_pseudo_id))) as timestamp) AS converted_ts,
FROM
events
LEFT JOIN id_stitching using (user_pseudo_id)
WHERE
event_name in ('Contact Us','Contact Us Clicked','CTA Pressed','Form Submitted')),
converting_sessions AS (
SELECT
*
FROM
converting_events
GROUP BY
1,
2,
3,
4,
5 ),
converting_sessions_deduped AS (
SELECT
blended_user_id AS blended_user_id,
MAX(CASE
WHEN event_name in ('Contact Us','Contact Us Clicked','CTA Pressed','Form Submitted') THEN session_id
END
) AS session_id,
MIN(converted_ts) AS converted_ts
FROM
converting_sessions
GROUP BY
1 ),
converting_sessions_deduped_labelled AS (
SELECT
c.blended_user_id,
s.session_start_ts,
s.session_end_ts,
c.converted_ts,
s.session_id AS session_id,
s.session_number AS session_seq,
CASE
WHEN (c.converted_ts BETWEEN s.session_start_ts AND coalesce(s.session_end_ts, current_timestamp)) THEN TRUE
ELSE
FALSE
END
AS conversion_session,
CASE
WHEN (c.converted_ts BETWEEN s.session_start_ts AND coalesce(s.session_end_ts, current_timestamp)) THEN 1
ELSE
0
END
AS event,
source,
medium,
channel,
referrer,
page_path,
device_category,
operating_system,
LANGUAGE,
is_limited_ad_tracking,
browser,
hostname,
continent,
country,
region,
city
FROM
sessions_with_start_end_times s
JOIN
converting_sessions_deduped c
ON
c.blended_user_id = s.blended_user_id
WHERE
c.converted_ts >= s.session_start_ts
ORDER BY
c.blended_user_id,
s.session_start_ts),
session_attrib_pct AS (
SELECT
*,
CASE
WHEN session_id = LAST_VALUE(session_id) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) THEN 1
ELSE
0
END
AS LAST_click_attrib_pct,
CASE
WHEN session_id = FIRST_VALUE(session_id) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) THEN 1
ELSE
0
END
AS first_click_attrib_pct,
1/COUNT(session_id) OVER (PARTITION BY blended_user_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS even_click_attrib_pct,
CASE
WHEN session_start_ts = FIRST_VALUE(session_start_ts) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AND MAX(event) OVER (PARTITION BY blended_user_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1 THEN 1.1-ROW_NUMBER() OVER (PARTITION BY blended_user_id)
WHEN session_start_ts > LAG(session_start_ts) OVER (PARTITION BY blended_user_id ORDER BY session_start_ts)
AND MAX(event) OVER (PARTITION BY blended_user_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1 THEN ROUND(1.1-1/ROW_NUMBER() OVER (PARTITION BY blended_user_id), 2)
ELSE
NULL
END
AS weights
FROM
converting_sessions_deduped_labelled),
session_attrib_pct_with_time_decay AS (
SELECT
*,
ROUND(CASE
WHEN (weights=0 OR SUM(weights) OVER (PARTITION BY blended_user_id)=0) THEN 0
ELSE
weights/SUM(weights) OVER (PARTITION BY blended_user_id)
END
, 2) AS time_decay_attrib_pct
FROM
session_attrib_pct)
SELECT
* EXCEPT (event,
weights,
converted_ts)
FROM
session_attrib_pct_with_time_decay
ORDER BY
blended_user_id,
session_start_ts
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment