Skip to content

Instantly share code, notes, and snippets.

View markrittman's full-sized avatar

Mark Rittman markrittman

View GitHub Profile
@markrittman
markrittman / ga4_attribution.sql
Created May 9, 2023 10:37
GA4 Multi-Step, Multi-Cycle Marketing Attribution example using the BigQuery GA4 Sample Dataset at https://developers.google.com/analytics/bigquery/web-ecommerce-demo-dataset
WITH
events AS (
SELECT
TIMESTAMP_MICROS(event_timestamp) AS event_ts,
CONCAT(user_pseudo_id,'-',event_name,'-',CAST(event_timestamp AS STRING)) AS event_id,
user_pseudo_id AS user_pseudo_id,
user_id,
traffic_source.name AS utm_channel,
traffic_source.medium AS utm_medium,
traffic_source.source AS utm_source,
@markrittman
markrittman / ten_next_pageviews_after_landing_page.sql
Created May 29, 2022 22:43
Return first ten page views after landing on a particular page using Segment
with events as (
select
anonymous_id,
user_id,
cast(null as string) as name,
cast(null as string) as email,
timestamp,
'page_view' as event_type,
concat(split(context_ip,'.')[safe_offset(0)],'.***.***.',split(context_ip,'.')[safe_offset(3)]) as context_ip,
context_page_path,
@markrittman
markrittman / ga4_visitor_journey.sql
Last active July 11, 2023 10:01
User Page View Journey for GA4
SELECT
user_pseudo_id,
geo.city,
TIMESTAMP_MICROS(MIN(event_timestamp)) AS session_start_ts,
traffic_source.name AS channel,
traffic_source.medium AS medium,
traffic_source.source AS source,
(
SELECT
value.int_value
@markrittman
markrittman / compare_segment_rudderstack_page_views_by_day.sql
Created May 21, 2023 18:13
Script to compare page view counts for Segment, Rudderstack and GA4 tracking, grouped by day
with segment_pageviews as (
select
timestamp_trunc(p.received_at,day) as day_day,
context_page_title as page_title,
count(distinct id) as page_views
from
`ra-development.company_website.pages_view` p
where
date(timestamp_trunc(p.received_at,DAY)) > '2023-05-01'
and context_page_url not like '%info.rittmananalytics.com%'
@markrittman
markrittman / compare_segment_rudderstack_ga4_page_views_by_page_title.sql
Last active May 21, 2023 18:10
Script to show total page views for all pages on our site as recorded by Segment, Rudderstack and GA4
with segment_pageviews as (
select
timestamp_trunc(p.received_at,day) as day_day,
context_page_title as page_title,
count(distinct id) as page_views
from
`ra-development.company_website.pages_view` p
where
date(timestamp_trunc(p.received_at,DAY)) > '2023-05-01'
and context_page_url not like '%info.rittmananalytics.com%'
@markrittman
markrittman / compare_rudderstack_segment_page_events_for_given_page_title.sql
Created May 21, 2023 15:12
Script to compare the page views logged by Segment and Rudderstack for the same page, used when the page view counts for the two sources differ
with segment_pageviews as (
select
'segment' as source, id, timestamp_trunc(timestamp,HOUR) as timestamp_minute, path, context_page_title, anonymous_id, user_id, context_ip
from
`ra-development.company_website.pages_view` p
where
date(timestamp_trunc(p.received_at,DAY)) > '2023-05-10'
and context_page_title = 'Adding Looker Regression Tests to the dbtCloud CI/CD Pipeline using Spectacles — Rittman Analytics'
@markrittman
markrittman / multi_channel_attribution.sql
Created February 8, 2020 22:56
Multi-Channel Attribution Standard SQL query for Google BigQuery sourcing session data from Segment and attributed value from operational system
WITH session_attributiON AS
(SELECT *,
CASE WHEN session_id = LAST_VALUE(session_id)
OVER (partitiON by user_id order by session_start_tstamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) THEN 1
ELSE 0
END AS LAST_click_attrib_pct,
CASE WHEN session_id = FIRST_VALUE(session_id)
OVER (partitiON by user_id order by session_start_tstamp ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) THEN 1
ELSE 0
END AS first_click_attrib_pct,
@markrittman
markrittman / ga4_sessions.sql
Created July 18, 2021 16:14
SQL Query to list out all sessions in GA4
SELECT
user_pseudo_id,
TIMESTAMP_MICROS(event_timestamp) AS session_start_ts,
CAST(LEAD(TIMESTAMP_MICROS(event_timestamp),1) OVER (PARTITION BY CONCAT(user_pseudo_id)
ORDER BY
event_timestamp) AS timestamp) AS session_end_ts,
(SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS session_id,
(SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_number') AS session_number,
(SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_referrer') AS referrer,
(SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location') AS landing_page_path,
@markrittman
markrittman / ga4_attribution_model.sql
Last active February 2, 2023 17:54
First-Click, Last-Click, Even-Click and Time Decay Attribution for GA4
/*
First-Click, Last-Click, Even-Click and Time Decay Attribution for GA4
Replace "event_name in ('Contact Us','Contact Us Clicked','CTA Pressed','Form Submitted')" with the name(s) of your conversion event in lines 54, 57, 61 and 76
*/
WITH
events AS (
SELECT
event_timestamp as event_ts,
user_pseudo_id AS user_pseudo_id,
user_id,
@markrittman
markrittman / landing_page_user_paths.sql
Last active December 30, 2022 10:23
Most common user paths from a given landing page
with events as (
select
anonymous_id,
user_id,
cast(null as string) as name,
cast(null as string) as email,
timestamp,
'page_view' as event_type,
concat(split(context_ip,'.')[safe_offset(0)],'.***.***.',split(context_ip,'.')[safe_offset(3)]) as context_ip,
context_page_path,