Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save khunreus/05fa134e408ad5bd6bdbfac4c8cb0675 to your computer and use it in GitHub Desktop.
Save khunreus/05fa134e408ad5bd6bdbfac4c8cb0675 to your computer and use it in GitHub Desktop.
The query pulls page views on Google Merchandise Store from the GA4 public datasource. Pages are categorised as PLP, PDPs and the rest of pages to understand User Journeys without going into too much detail.
-- pulling user page views from GA4 events
WITH base_table AS (
-- pulls relevant columns from relevant dates to decrease the size of data scanned
SELECT
event_name,
event_date,
event_timestamp,
user_pseudo_id,
user_id,
device,
geo,
traffic_source,
event_params,
user_properties
FROM
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE
_table_suffix >= '20210101'
AND event_name IN ('page_view')
)
, unnested_events AS (
-- unnests event parameters to get to relevant keys and values
SELECT
event_date AS date,
event_timestamp AS event_timestamp_microseconds,
user_pseudo_id,
MAX(CASE WHEN c.key = 'ga_session_id' THEN c.value.int_value END) AS visitID,
MAX(CASE WHEN c.key = 'ga_session_number' THEN c.value.int_value END) AS visitNumber,
MAX(CASE WHEN c.key = 'page_title' THEN c.value.string_value END) AS page_title,
MAX(CASE WHEN c.key = 'page_location' THEN c.value.string_value END) AS page_location
FROM
base_table,
UNNEST (event_params) c
GROUP BY 1,2,3
)
, unnested_events_categorised AS (
-- categorizing Page Titles into PDPs and PLPs
SELECT
*,
CASE WHEN ARRAY_LENGTH(SPLIT(page_location, '/')) >= 5
AND
CONTAINS_SUBSTR(ARRAY_REVERSE(SPLIT(page_location, '/'))[SAFE_OFFSET(0)], '+')
AND (LOWER(SPLIT(page_location, '/')[SAFE_OFFSET(4)]) IN
('accessories','apparel','brands','campus+collection','drinkware',
'electronics','google+redesign',
'lifestyle','nest','new+2015+logo','notebooks+journals',
'office','shop+by+brand','small+goods','stationery','wearables'
)
OR
LOWER(SPLIT(page_location, '/')[SAFE_OFFSET(3)]) IN
('accessories','apparel','brands','campus+collection','drinkware',
'electronics','google+redesign',
'lifestyle','nest','new+2015+logo','notebooks+journals',
'office','shop+by+brand','small+goods','stationery','wearables'
)
)
THEN 'PDP'
WHEN NOT(CONTAINS_SUBSTR(ARRAY_REVERSE(SPLIT(page_location, '/'))[SAFE_OFFSET(0)], '+'))
AND (LOWER(SPLIT(page_location, '/')[SAFE_OFFSET(4)]) IN
('accessories','apparel','brands','campus+collection','drinkware',
'electronics','google+redesign',
'lifestyle','nest','new+2015+logo','notebooks+journals',
'office','shop+by+brand','small+goods','stationery','wearables'
)
OR
LOWER(SPLIT(page_location, '/')[SAFE_OFFSET(3)]) IN
('accessories','apparel','brands','campus+collection','drinkware',
'electronics','google+redesign',
'lifestyle','nest','new+2015+logo','notebooks+journals',
'office','shop+by+brand','small+goods','stationery','wearables'
)
)
THEN 'PLP'
ELSE page_title
END AS page_title_adjusted
FROM
unnested_events
)
, ranked_screens AS (
-- prepares additional data points for analytics to understand transitions between the previous, current and following pages
SELECT
*,
DENSE_RANK() OVER (PARTITION BY user_pseudo_id, visitID ORDER BY event_timestamp_microseconds ASC) page_rank,
LAG(page_title_adjusted,1) OVER (PARTITION BY user_pseudo_id, visitID ORDER BY event_timestamp_microseconds ASC) previous_page,
LEAD(page_title_adjusted,1) OVER (PARTITION BY user_pseudo_id, visitID ORDER BY event_timestamp_microseconds ASC) next_page
FROM
unnested_events_categorised
)
, screen_summary AS (
-- another layer of analytics: check the last page number viewed on a session
-- aggregate all screens per session, which will be helpful in identifying power users
SELECT
*,
MAX(page_rank) OVER (PARTITION BY user_pseudo_id, visitID) last_page_rank,
ARRAY_AGG(page_title_adjusted) OVER (PARTITION BY user_pseudo_id, visitID) pages_on_a_visit
FROM
ranked_screens
)
, screen_summary_agg AS (
-- cleans up pages_on_a_visit field
SELECT * EXCEPT(pages_on_a_visit),
ARRAY_TO_STRING(ARRAY(SELECT DISTINCT * FROM UNNEST(pages_on_a_visit) ORDER BY 1 ASC), '>>') AS screens_on_a_visit
FROM
screen_summary
)
SELECT
-- final analytical point: the first time a page is viewed in a session
*,
MIN(page_rank) OVER (PARTITION BY user_pseudo_id, visitID, page_title) AS page_rank_session_first_seen
FROM
screen_summary_agg
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment