Skip to content

Instantly share code, notes, and snippets.

@prabapro
Last active July 10, 2022 05:18
Show Gist options
  • Save prabapro/a3b3744faa9fa5ba7a284e53d5673f3b to your computer and use it in GitHub Desktop.
Save prabapro/a3b3744faa9fa5ba7a284e53d5673f3b to your computer and use it in GitHub Desktop.
-- Top 10 events for the past 30 days (excluding today)
-- This query would return the event names & its count for the past 30 days excluding today.
-- To see the top 10 events, we are limiting the results using the limit clause.
select
event_name,
count(*) as number_of_events
from
`<projectId>.<datasetId>.events_*`
where
_table_suffix between format_date('%Y%m%d',date_sub(current_date(), interval 30 day))
and format_date('%Y%m%d',date_sub(current_date(), interval 1 day))
group by
event_name
order by
number_of_events desc
limit
10
-- Top 10 events for the past 30 days (including today)
-- This query would return the event names & its count for the past 30 days including today.
-- We are using regular expression in the where clause to query the both daily & intraday tables.
select
event_name,
count(*) as number_of_events
from
`<projectId>.<datasetId>.events_*`
where
regexp_extract(_table_suffix, '[0-9]+') between format_date('%Y%m%d',date_sub(current_date(), interval 30 day))
and format_date('%Y%m%d',current_date())
group by
event_name
order by
number_of_events desc
limit
10
-- Unnesting event_params to see the pages viewed in a date range.
-- In this query, we would unnest the repeated field event_params.
-- This is a must learn technique to access the values inside the nested columns such as event_params, user_properties & items.
-- This query would return the url & the page title viewed by the users during a selected date range.
select
event_date,
(select value.string_value from unnest(event_params) where key = 'page_location') as page_location,
(select value.string_value from unnest(event_params) where key = 'page_title') as page_title,
count(*) as number_of_events
from
`<projectId>.<datasetId>.events_*`
where
_table_suffix between '20220601' and '20220610'
and event_name = 'page_view'
group by
1,2,3
order by
1
-- Removing the query strings from the URL
-- In GA4, the page_location parameter contains the query strings.
-- In this query, we would use regexp_replace function to make the URLs clean.
select
event_date,
regexp_replace((select value.string_value from unnest(event_params) where key = 'page_location'), r'\?.*', '') as page_location,
(select value.string_value from unnest(event_params) where key = 'page_title') as page_title,
count(*) as number_of_events
from
`<projectId>.<datasetId>.events_*`
where
_table_suffix between '20220601' and '20220610'
and event_name = 'page_view'
group by
1,2,3
order by
1
-- Count Users & New Users
-- This query would return the users count using the user_pseudo_id column.
-- user_pseudo_id comes from the browser cookie ID (web) & device ID (apps).
-- Also the GA4 sends first_visit event for the new users. We use countif function to calculate the new users.
select
count(distinct user_pseudo_id) as users,
countif(event_name = 'first_visit') as new_users
from
`<projectId>.<datasetId>.events_*`
where
_table_suffix between '20220601' and '20220610'
-- Count users by acquisition campaign
-- This query would return the users counts by the acquisition campaign, source & medium.
select
traffic_source.name as campaign_name,
traffic_source.source,
traffic_source.medium,
count(distinct user_pseudo_id) as users,
from
`<projectId>.<datasetId>.events_*`
where
_table_suffix between '20220601' and '20220610'
group by
1,2,3
order by
4 desc
-- Count users by Browsers & Browser versions
-- This query would return the users counts by the browsers & browser versions.
-- For this we use the nested field device.
select
device.web_info.browser,
device.web_info.browser_version,
count(distinct user_pseudo_id) as users
from
`<projectId>.<datasetId>.events_*`
where
_table_suffix between '20220601' and '20220610'
group by
1,2
order by
3 desc
-- User properties
-- In this query, we are unnesting another repeated field user_properies &
-- get the user_id & the imaginary user property loyalty_tier.
-- We are also filtering out the null values from the user_id column.
select
(select value.int_value from unnest(user_properties) where key = 'user_id') as user_id,
(select value.string_value from unnest(user_properties) where key = 'loyalty_tier') as loyalty_tier
from
`<projectId>.<datasetId>.events_*`
where
_table_suffix between '20220601' and '20220610'
and user_id is not null
group by
1,2
order by
user_id
-- Landing Pages
-- The first page viewed in a session would be considered as the landing page.
-- Since GA4 BigQuery export doesn’t have a column to get this metric out of the box,
-- we need to use a CASE statement to define the landing pages.
-- Similar to the Query #04, we are cleansing the query strings from the page_location.
with base_table as
(select
user_pseudo_id,
(select value.int_value from unnest(event_params) where event_name = 'page_view' and key = 'ga_session_id') as session_id,
regexp_replace((select value.string_value from unnest(event_params) where event_name = 'page_view' and key = 'page_location'), r'\?.*', '') as page_location ,
case when
(select value.int_value from unnest(event_params) where event_name = 'page_view' and key = 'entrances') = 1
then true else false
end as landing_page
from
`<projectId>.<datasetId>.events_*`
where
_table_suffix between '20220601' and '20220610'
and event_name = 'page_view')
select
case when landing_page is true then page_location else null end as landing_page,
count(distinct concat(user_pseudo_id,session_id)) as number_of_visits
from base_table
group by 1
having landing_page is not null
order by 2 desc
-- Ecommerce Items & Revenue
-- This is one of the mostly used query especially for the e-commerce businesses.
-- In this query, we are unnesting the repeated field items & analyse which products were sold with each transaction.
-- We are also using format_datetime function to convert the event_timestamp in to the New York time.
select
timestamp_micros(event_timestamp) as timestamp_utc,
format_datetime("%Y-%d-%m %H-%M-%S %Z", (timestamp_micros(event_timestamp)), 'America/New_York') as timestamp_newyork,
ecommerce.transaction_id as transaction_id,
ecommerce.purchase_revenue as revenue,
(select value.string_value from unnest(event_params) where key = 'currency') as currency,
(select value.string_value from unnest (event_params) where key = 'page_title') as page_title,
(select value.string_value from unnest (event_params) where key = 'page_location') as url,
geo.country as country,
array_agg(items.item_name) as items_bought
from
`<projectId>.<datasetId>.events_*`,
unnest(items) as items
where
event_name = 'purchase'
and _table_suffix between '20220601' and '20220610'
group by
1,2,3,4,5,6,7,8
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment