/0_bqml_wildcard_tables.sql Secret
Last active
December 6, 2023 14:41
BQML #1 - Train your first model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--count unique users number in January | |
SELECT | |
COUNT(DISTINCT user_pseudo_id) AS users | |
FROM | |
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*` | |
WHERE | |
_TABLE_SUFFIX BETWEEN '20210101' AND '20210131' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- PLEASE REPLACE WITH YOUR DESIRED DATE RANGE | |
-- AND _TABLE_SUFFIX BETWEEN '20180501' AND '20240131'` | |
CREATE TABLE bqmlhackathon.ga4.ga4_sample_ecommerce AS | |
-- sessions enhanced by users data | |
SELECT s.fullVisitorId, s.visitStartTime, s.date, s.deviceCategory, s.isMobile, s.operatingSystem, s.browser, | |
s.country, s.city, s.trafficSource, s.trafficMedium, s.trafficCampaign, s.isFirstVisit, s.productPagesViewed, s.addedToCart, | |
u.totalHits, u.totalPageviews, u_visits.totalVisits, u_engagement.totalTimeOnSite | |
FROM ( | |
-- sessions | |
SELECT fullVisitorId, ga_session_id, | |
MAX(visitStartTime) AS visitStartTime, | |
date, deviceCategory, isMobile, operatingSystem, browser, | |
country, city, trafficSource, trafficMedium, trafficCampaign, | |
MAX(isFirstVisit) AS isFirstVisit, | |
MAX(productPagesViewed) AS productPagesViewed, | |
MAX(addedToCart) AS addedToCart | |
FROM ( | |
-- raw events | |
SELECT | |
user_pseudo_id AS fullVisitorId, | |
ep.value.int_value AS ga_session_id, | |
CASE event_name WHEN 'session_start' THEN event_timestamp ELSE NULL END AS visitStartTime, | |
event_date AS date, | |
device.category AS deviceCategory, | |
CASE device.category WHEN 'mobile' THEN 1 ELSE 0 END AS isMobile, | |
device.operating_system AS operatingSystem, | |
device.web_info.browser AS browser, | |
geo.country AS country, | |
geo.city AS city, | |
traffic_source.source AS trafficSource, | |
traffic_source.medium AS trafficMedium, | |
CASE traffic_source.medium WHEN 'cpc' THEN traffic_source.name ELSE NULL END AS trafficCampaign, | |
CASE event_name WHEN 'first_visit' THEN 1 ELSE 0 END AS isFirstVisit, | |
CASE event_name WHEN 'view_item' THEN 1 ELSE 0 END AS productPagesViewed, | |
CASE event_name WHEN 'add_to_cart' THEN 1 ELSE 0 END AS addedToCart | |
FROM | |
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_202101*` e | |
CROSS JOIN | |
UNNEST(e.event_params) ep | |
WHERE ep.key = 'ga_session_id' | |
) events | |
GROUP BY fullVisitorId, ga_session_id, date, deviceCategory, isMobile, | |
operatingSystem, browser, country, city, trafficSource, trafficMedium, trafficCampaign | |
) s | |
LEFT JOIN ( | |
-- totalHits, totalPageviews | |
SELECT user_pseudo_id AS fullVisitorId, | |
COUNT(user_pseudo_id) AS totalHits, | |
COUNTIF(event_name = 'page_view') AS totalPageviews, | |
FROM | |
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_202101*` e | |
GROUP BY user_pseudo_id | |
) u USING(fullVisitorId) | |
LEFT JOIN ( | |
-- totalVisits | |
-- count unique `ga_session_id` | |
SELECT user_pseudo_id AS fullVisitorId, | |
COUNT(DISTINCT ep.value.int_value) AS totalVisits | |
FROM | |
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_202101*` e | |
CROSS JOIN | |
UNNEST(e.event_params) ep | |
WHERE ep.key = 'ga_session_id' | |
GROUP BY user_pseudo_id | |
) u_visits USING(fullVisitorId) | |
LEFT JOIN ( | |
-- totalTimeOnSite (divide by 1000 to get seconds) | |
SELECT user_pseudo_id AS fullVisitorId, | |
MAX(engagement_time_msec) AS totalTimeOnSite | |
FROM ( | |
SELECT | |
user_pseudo_id, | |
value.int_value AS engagement_time_msec | |
FROM | |
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_202101*` e | |
CROSS JOIN | |
UNNEST(e.event_params) ep WHERE key = 'engagement_time_msec' | |
) s | |
GROUP BY user_pseudo_id | |
) u_engagement USING(fullVisitorId) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE MODEL | |
bqmlhackathon.ga4.init_model_logistic_reg | |
TRANSFORM( | |
* EXCEPT( | |
fullVisitorId, --don't train on user_id | |
isFirstVisit, --we'll change the feature type below | |
date --don't train on date | |
) | |
,CAST(isFirstVisit AS bool) AS isFirstVisit | |
--change feature type to bool | |
) | |
OPTIONS(MODEL_TYPE = 'LOGISTIC_REG', | |
INPUT_LABEL_COLS = ['addedToCart'] | |
) | |
AS | |
SELECT | |
* | |
FROM | |
`bqmlhackathon.ga4.ga4_sample_ecommerce` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#standardSQL | |
SELECT * | |
FROM ML.WEIGHTS(MODEL `bqmlhackathon.ga4.init_model_logistic_reg`) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
sessionId, | |
prob as addedToCart | |
FROM | |
ML.PREDICT(MODEL `bqmlhackathon.ga4.init_model_logistic_reg`, | |
( | |
SELECT *,CONCAT(fullVisitorId, CAST(visitStartTime as string)) as sessionId | |
FROM `bqmlhackathon.ga4.ga4_sample_ecommerce_20201231`) | |
), | |
UNNEST(predicted_addedToCart_probs) | |
WHERE label = 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment