Skip to content

Instantly share code, notes, and snippets.

@brunomertins
Forked from jhnvdw/aw_traffic.sql
Created March 20, 2023 12:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brunomertins/e7085c2d9334dbd639a2609f273f4529 to your computer and use it in GitHub Desktop.
Save brunomertins/e7085c2d9334dbd639a2609f273f4529 to your computer and use it in GitHub Desktop.
WITH
-- Subquery to set the start and end date once for the whole query
period AS (
SELECT
'20200220' AS start_date,
FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)) AS end_date),
-- Subquery to prepare and calculate traffic source data based on user and session id
traffic AS (
SELECT
user_pseudo_id,
session_id,
session_number,
CONCAT(source,' / ',medium) AS source_medium_session,
CASE
WHEN campaign_session IS NULL THEN '(direct)'
ELSE
campaign_session
END
AS campaign_session,
full_referrer,
-- Definitions of the channel grouping based on the source / medium of every session
CASE
WHEN source = '(direct)' AND (medium = '(not set)' OR medium = '(none)') THEN 'Direct'
WHEN medium = 'organic' THEN 'Organic Search'
WHEN REGEXP_CONTAINS(medium, r'^(social|social-network|social-media|sm|social network|social media)$') THEN 'Social'
WHEN medium = 'email' THEN 'Email'
WHEN medium = 'affiliate' THEN 'Affiliates'
WHEN medium = 'referral' THEN 'Referral'
WHEN REGEXP_CONTAINS(medium, r'^(cpc|ppc|paidsearch)$') THEN 'Paid Search'
WHEN REGEXP_CONTAINS(medium, r' ^(cpv|cpa|cpp|content-text)$') THEN 'Other Advertising'
WHEN REGEXP_CONTAINS(medium, r'^(display|cpm|banner)$') THEN 'Display'
ELSE
'(Other)'
END
AS default_channel_grouping_session,
source_medium_user,
campaign_user,
default_channel_grouping_user
FROM (
SELECT
user_pseudo_id,
(
SELECT
value.int_value
FROM
UNNEST(event_params)
WHERE
event_name = 'page_view'
AND key = 'ga_session_id') AS session_id,
(
SELECT
value.int_value
FROM
UNNEST(event_params)
WHERE
event_name = 'page_view'
AND key = 'ga_session_number') AS session_number,
events.event_timestamp,
RANK() OVER (PARTITION BY user_pseudo_id, (SELECT value.int_value FROM UNNEST(event_params)
WHERE
event_name = 'page_view'
AND key = 'ga_session_id')
ORDER BY
event_timestamp) AS rank,
CASE
WHEN ( SELECT value.string_value FROM UNNEST(event_params) WHERE event_name = 'page_view' AND key = 'source') IS NULL THEN '(direct)'
ELSE
(
SELECT
value.string_value
FROM
UNNEST(event_params)
WHERE
event_name = 'page_view'
AND key = 'source')
END
AS source,
CASE
WHEN ( SELECT value.string_value FROM UNNEST(event_params) WHERE event_name = 'page_view' AND key = 'medium') IS NULL THEN '(none)'
ELSE
(
SELECT
value.string_value
FROM
UNNEST(event_params)
WHERE
event_name = 'page_view'
AND key = 'medium')
END
AS medium,
(
SELECT
value.string_value
FROM
UNNEST(event_params)
WHERE
event_name = 'page_view'
AND key = 'campaign') AS campaign_session,
(
SELECT
value.string_value
FROM
UNNEST(event_params)
WHERE
event_name = 'page_view'
AND key = 'page_referrer') AS full_referrer,
CONCAT(traffic_source.source,' / ',traffic_source.medium) AS source_medium_user,
traffic_source.name AS campaign_user,
-- Definitions of the channel grouping based on the source / medium of a user's first session
CASE
WHEN traffic_Source.source = '(direct)' AND (traffic_Source.medium = '(not set)' OR traffic_Source.medium = '(none)') THEN 'Direct'
WHEN traffic_Source.medium = 'organic' THEN 'Organic Search'
WHEN REGEXP_CONTAINS(traffic_Source.medium, r'^(social|social-network|social-media|sm|social network|social media)$') THEN 'Social'
WHEN traffic_Source.medium = 'email' THEN 'Email'
WHEN traffic_Source.medium = 'affiliate' THEN 'Affiliates'
WHEN traffic_Source.medium = 'referral' THEN 'Referral'
WHEN REGEXP_CONTAINS(traffic_Source.medium, r'^(cpc|ppc|paidsearch)$') THEN 'Paid Search'
WHEN REGEXP_CONTAINS(traffic_Source.medium, r' ^(cpv|cpa|cpp|content-text)$') THEN 'Other Advertising'
WHEN REGEXP_CONTAINS(traffic_Source.medium, r'^(display|cpm|banner)$') THEN 'Display'
ELSE
'(Other)'
END
AS default_channel_grouping_user
FROM
-- Change this to your Google Analytics 4 BigQuery export location
`ga4.analytics_1234567890.events_*`,
period
WHERE
_table_suffix BETWEEN period.start_date
AND period.end_date
AND (
SELECT
value.int_value
FROM
UNNEST(event_params)
WHERE
event_name = 'page_view'
AND key = 'ga_session_id') IS NOT NULL)
WHERE
rank = 1)
-- Main query
SELECT
-- User Default Channel Grouping (dimension | the Channel Group associated with an user's first session)
default_channel_grouping_user,
-- User Source / Medium (dimension | the referral source and type associated with an user's first session)
source_medium_user,
-- User Campaign (dimension | the value of a campaign associated with an user's first session)
campaign_user,
-- Session Default Channel Grouping (dimension | the Channel Group associated with a session)
default_channel_grouping_session,
-- Session Source / Medium (dimension | the referral source and type associated with a session)
source_medium_session,
-- Session Campaign (dimension | the value of a campaign associated with a session)
campaign_session,
-- Session Full Referrer (dimension | the full referring URL of a session, including the hostname and path)
full_referrer
FROM
traffic
GROUP BY
1,
2,
3,
4,
5,
6,
7
LIMIT
10
@brunomertins
Copy link
Author

Query for Channel Grouping

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment